예제 #1
0
        private static List <MatchClassification> CrawlLocation(CrawlerLinkDetails locationLink, string season)
        {
            List <MatchClassification> matchClassifications = new List <MatchClassification>();
            var taskQueue = new Queue <Task <MatchClassification> >();

            foreach (var link in PageCrawler.CrawlLinksPage(locationLink))
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task <MatchClassification> .Factory.StartNew(() => CrawlClassification(l, l.LinkText, season)));
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                                         completedTasks =>
            {
                foreach (Task <MatchClassification> task in completedTasks)
                {
                    if (task.Exception == null)
                    {
                        matchClassifications.Add(task.Result);
                    }
                    else
                    {
                        Log.Error("Unexpected exception",
                                  task.Exception);
                    }
                }
            })
            .Wait();

            return(matchClassifications);
        }
예제 #2
0
        public CrawlResults Recheck(CrawlResults crawlResults)
        {
            Console.Write("Rechecking {0} ", crawlResults.Season);
            var classificationLinks = from c in crawlResults.Classifications
                                      select
                                      new CrawlerLinkDetails
            {
                DestinationPageType = PageType.MatchList,
                DestinationUrl      = c.Url,
                LinkText            = c.Name,
                SourcePageType      = PageType.MatchClassification,
                SourcePageUrl       = c.LocationIndexUrl
            };

            List <MatchClassification> matchClassifications = new List <MatchClassification>();
            var taskQueue = new Queue <Task <MatchClassification> >();

            foreach (var link in classificationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task <MatchClassification> .Factory.StartNew(() => CrawlClassification(l, l.LinkText, crawlResults.Season)));
            }

            if (taskQueue.Count == 0)
            {
                Console.WriteLine(" done.");
                Log.InfoFormat("No URLs to check for season {0}", crawlResults.Season);
                return(crawlResults);
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                                         completedTasks =>
            {
                foreach (Task <MatchClassification> task in completedTasks)
                {
                    if (task.Exception == null)
                    {
                        matchClassifications.Add(task.Result);
                    }
                    else
                    {
                        Log.Error("Unexpected exception",
                                  task.Exception);
                    }
                }
            })
            .Wait();

            Console.WriteLine(" done.");

            return(new CrawlResults
            {
                Id = crawlResults.Id,
                Season = crawlResults.Season,
                Classifications = matchClassifications
            });
        }
예제 #3
0
 private static MatchClassification CrawlClassification(CrawlerLinkDetails classificationLink, string location, string season)
 {
     return(new MatchClassification
     {
         Location = location,
         LocationIndexUrl = classificationLink.SourcePageUrl,
         Name = classificationLink.LinkText,
         Url = classificationLink.DestinationUrl,
         Scorecards = PageCrawler.CrawlMatchListPage(classificationLink, season)
     });
 }
예제 #4
0
        public CrawlResults Crawl(Season season)
        {
            Console.Write("Crawling " + season.Name + " ");
            CrawlResults results = new CrawlResults {
                Season = season.Name
            };

            CrawlerLinkDetails seasonPage = new CrawlerLinkDetails
            {
                SourcePageType      = PageType.SeasonList,
                SourcePageUrl       = "http://cricketarchive.com/Archive/Seasons/index.html",
                DestinationPageType = PageType.LocationList,
                DestinationUrl      = season.Url,
                LinkText            = season.Name
            };

            List <CrawlerLinkDetails> locationLinks = PageCrawler.CrawlLinksPage(seasonPage);

            List <MatchClassification> classifications = new List <MatchClassification>();

            var taskQueue = new Queue <Task <List <MatchClassification> > >();

            foreach (var link in locationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task <List <MatchClassification> > .Factory.StartNew(() => CrawlLocation(l, season.Name)));
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                                         completedTasks =>
            {
                foreach (Task <List <MatchClassification> > task in completedTasks)
                {
                    if (task.Exception == null)
                    {
                        classifications.AddRange(task.Result);
                    }
                    else
                    {
                        Log.Error("Unexpected exception",
                                  task.Exception);
                    }
                }
            })
            .Wait();

            results.Classifications = classifications;
            Console.WriteLine(" done.");
            return(results);
        }
예제 #5
0
파일: Spider.cs 프로젝트: bugedone/somerset
        public CrawlResults Crawl(Season season)
        {
            Console.Write("Crawling " + season.Name + " ");
            CrawlResults results = new CrawlResults { Season = season.Name};

            CrawlerLinkDetails seasonPage = new CrawlerLinkDetails
                                                {
                                                    SourcePageType = PageType.SeasonList,
                                                    SourcePageUrl = "http://cricketarchive.com/Archive/Seasons/index.html",
                                                    DestinationPageType = PageType.LocationList,
                                                    DestinationUrl = season.Url,
                                                    LinkText = season.Name
                                                };

            List<CrawlerLinkDetails> locationLinks = PageCrawler.CrawlLinksPage(seasonPage);

            List<MatchClassification> classifications = new List<MatchClassification>();

            var taskQueue = new Queue<Task<List<MatchClassification>>>();

            foreach (var link in locationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task<List<MatchClassification>>.Factory.StartNew(() => CrawlLocation(l, season.Name)));
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                    completedTasks =>
                        {
                            foreach (Task<List<MatchClassification>> task in completedTasks)
                            {
                                if (task.Exception == null)
                                    classifications.AddRange(task.Result);
                                else
                                {
                                    Log.Error("Unexpected exception",
                                            task.Exception);
                                }
                            }
                        })
                    .Wait();

            results.Classifications = classifications;
            Console.WriteLine(" done.");
            return results;
        }
예제 #6
0
파일: Spider.cs 프로젝트: bugedone/somerset
        private static List<MatchClassification> CrawlLocation(CrawlerLinkDetails locationLink, string season)
        {
            List<MatchClassification> matchClassifications = new List<MatchClassification>();
            var taskQueue = new Queue<Task<MatchClassification>>();

            foreach (var link in PageCrawler.CrawlLinksPage(locationLink))
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task<MatchClassification>.Factory.StartNew(() => CrawlClassification(l, l.LinkText, season)));
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                    completedTasks =>
                    {
                        foreach (Task<MatchClassification> task in completedTasks)
                        {
                            if (task.Exception == null)
                                matchClassifications.Add(task.Result);
                            else
                            {
                                Log.Error("Unexpected exception",
                                        task.Exception);
                            }
                        }
                    })
                    .Wait();

            return matchClassifications;
        }
예제 #7
0
파일: Spider.cs 프로젝트: bugedone/somerset
 private static MatchClassification CrawlClassification(CrawlerLinkDetails classificationLink, string location, string season)
 {
     return new MatchClassification
                {
                    Location = location,
                    LocationIndexUrl = classificationLink.SourcePageUrl,
                    Name = classificationLink.LinkText,
                    Url = classificationLink.DestinationUrl,
                    Scorecards = PageCrawler.CrawlMatchListPage(classificationLink, season)
                };
 }