Beispiel #1
0
        public IEnumerable <Event> Extract(CrawlResult result)
        {
            var dom = result.DOM;

            var selector        = dom["#heat_lists > table > tbody > tr:nth-child(n + 1) > td:nth-child(4) > p"];
            var timeSelector    = dom["#heat_lists > table > tbody > tr > td.heatlist_datetime > p"];
            var sessionSelector = dom["#heat_lists > table > tbody > tr > td[valign='top']:nth-child(1) > p"];
            var heatSelector    = dom["#heat_lists > table > tbody > tr > td[valign='top']:nth-child(2) > p"];

            var events = from eventNames in selector
                         let name = regex.Replace(eventNames.InnerText.Trim(), " ")
                                    select new Event {
                Name = name
            };

            var times = from time in timeSelector
                        select time.InnerText.Trim();

            var sessions = from session in sessionSelector
                           select session.InnerText.Trim();

            var heats = from heat in heatSelector
                        select heat.InnerText.Trim();

            return(events.Zip(times, sessions, heats, (ev, t, s, h) =>
            {
                ev.Session = s;
                ev.Heat = h;
                ev.Time = t;
                return ev;
            }));
        }
Beispiel #2
0
 public override IObservable <CrawlResult> Crawl(CrawlResult result)
 {
     return(Observable.If(() => Extractor.CanExtract(result),
                          Extractor.Extract(result).ToObservable()
                          .Do(x => Console.WriteLine("Crawling: " + x))
                          .SelectMany(uri => base.Crawl(new CrawlResult(uri, null)))));
 }
 public IEnumerable <Uri> Extract(CrawlResult result)
 {
     Console.WriteLine("Extracting: " + result.Uri);
     return(result.DOM["#main_tbl > tbody > tr > td:nth-child(2) > a"].Select(domObj =>
     {
         return new Uri(result.Uri, domObj["href"]);
     }));
 }
        public virtual IObservable <CrawlResult> Crawl(CrawlResult result)
        {
            var crawl = this.CreateCrawlerForUri(result.Uri);

            return(crawl.Select(cq => new CrawlResult(result.Uri, cq))
                   .SelectMany(cr => this.SubCrawlers.Where(sub => sub.CanCrawl(cr)),
                               (cr, sub) => sub.Crawl(cr))
                   .Merge());
        }
Beispiel #5
0
 public override IObservable <CrawlResult> Crawl(CrawlResult result)
 {
     return(result.DOM["#placement > form > table:nth-child(2) > tbody > tr > td.h5b > a"].Select(domObj =>
     {
         return new Uri(result.Uri, domObj["href"]);
     })
            .Select(x => new CrawlResult(x, null))
            .ToObservable()
            .SelectMany(cr =>
     {
         return this.CreateCrawlerForUri(cr.Uri)
         .Select(x => new CrawlResult(cr.Uri, x));
     }));
 }
        public IObservable <CrawlResult> Crawl(CrawlResult result)
        {
            var dom = result.DOM;

            var urls = dom["#results_competitor_list > a[onclick]"].Select(participant =>
            {
                //Will have to do some finagaling to get the onclick correct
                var clickAttr   = participant.GetAttribute("onclick");
                var firstQuote  = clickAttr.IndexOf('\'');
                var secondQuote = clickAttr.IndexOf('\'', firstQuote + 1);

                return(clickAttr.Substring(firstQuote + 1, secondQuote - firstQuote - 1));
            });

            return(urls.ToObservable().SelectMany(url =>
            {
                return this.CreateCrawlerForUri(new Uri(url), scheduler: this.scheduler);
            }, (url, cq) => new CrawlResult(new Uri(url), cq)));
        }
        public IEnumerable <Dancer> Extract(CrawlResult crawlResult)
        {
            var dom = crawlResult.DOM;
            var uri = crawlResult.Uri;

            var dancerName   = dom["#header > span"].Select(x => x.InnerText.Trim());
            var partnerNames = dom["#heat_lists > table > tbody > tr > td[colspan='4'] > p"]
                               .Select(pn => {
                var trimmedText = pn.InnerText.Trim(':', ' ');
                var withIndex   = trimmedText.IndexOf("with");
                var minusWith   = trimmedText.Substring(withIndex + 4).Trim();
                return(minusWith);
            });

            return(from name in dancerName
                   select new Dancer
            {
                Name = name,
                Uri = uri.AbsoluteUri,
                Events = eventExtractor.Extract(crawlResult).ToList(),
                //Events = eventNames.Select(ev => new Event {  Name = ev }).ToList(),
                Partners = partnerNames.Distinct().ToList()
            });
        }
 public bool CanCrawl(CrawlResult result)
 {
     return(true);
 }
 public bool CanExtract(CrawlResult result)
 {
     return(true);
 }
Beispiel #10
0
 public override bool CanCrawl(CrawlResult result)
 {
     return(result.Uri.Host.Contains("results.o2cm.com"));
 }
 public bool CanExtract(CrawlResult result)
 {
     return(result.Uri.Host.Contains("results.o2cm.com") &&
            result.DOM["#main_tbl"].Any());
 }
 public virtual bool CanCrawl(CrawlResult result)
 {
     return(true);
 }