コード例 #1
0
ファイル: Spider.cs プロジェクト: bugedone/somerset
        public CrawlResults Crawl(Season season)
        {
            Console.Write("Crawling " + season.Name + " ");
            CrawlResults results = new CrawlResults { Season = season.Name};

            CrawlerLinkDetails seasonPage = new CrawlerLinkDetails
                                                {
                                                    SourcePageType = PageType.SeasonList,
                                                    SourcePageUrl = "http://cricketarchive.com/Archive/Seasons/index.html",
                                                    DestinationPageType = PageType.LocationList,
                                                    DestinationUrl = season.Url,
                                                    LinkText = season.Name
                                                };

            List<CrawlerLinkDetails> locationLinks = PageCrawler.CrawlLinksPage(seasonPage);

            List<MatchClassification> classifications = new List<MatchClassification>();

            var taskQueue = new Queue<Task<List<MatchClassification>>>();

            foreach (var link in locationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task<List<MatchClassification>>.Factory.StartNew(() => CrawlLocation(l, season.Name)));
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                    completedTasks =>
                        {
                            foreach (Task<List<MatchClassification>> task in completedTasks)
                            {
                                if (task.Exception == null)
                                    classifications.AddRange(task.Result);
                                else
                                {
                                    Log.Error("Unexpected exception",
                                            task.Exception);
                                }
                            }
                        })
                    .Wait();

            results.Classifications = classifications;
            Console.WriteLine(" done.");
            return results;
        }
コード例 #2
0
ファイル: BaseCrawlCommand.cs プロジェクト: bugedone/somerset
        protected static string DumpResults(CrawlResults results)
        {
            if (results.Classifications.Count == 0)
                return "No Somerset matches found in " + results.Season;

            StringBuilder sb = new StringBuilder();
            sb.AppendLine(new string('-', 120));
            string title = "Somerset matches in " + results.Season;
            sb.Append("| ").Append(title.PadRight(116)).AppendLine(" |");
            sb.AppendLine(new string('-', 120));

            var matches = from c in results.Classifications
                          from sc in c.Scorecards
                          select sc;

            foreach (ScorecardDetails match in matches)
            {
                sb.Append("| ").Append(FormatValue(match, 116)).AppendLine(" |");
            }
            sb.AppendLine(new string('-', 120));

            return sb.ToString();
        }
コード例 #3
0
ファイル: ScorecardDetails.cs プロジェクト: bugedone/somerset
 public static string GenerateKey(CrawlResults crawlResults)
 {
     return "crawler/" + crawlResults.Season;
 }
コード例 #4
0
ファイル: ScorecardDetails.cs プロジェクト: bugedone/somerset
 public static string GenerateKey(CrawlResults crawlResults)
 {
     return("crawler/" + crawlResults.Season);
 }
コード例 #5
0
ファイル: Spider.cs プロジェクト: bugedone/somerset
        public CrawlResults Recheck(CrawlResults crawlResults)
        {
            Console.Write("Rechecking {0} ", crawlResults.Season);
            var classificationLinks = from c in crawlResults.Classifications
                                      select
                                          new CrawlerLinkDetails
                                              {
                                                  DestinationPageType = PageType.MatchList,
                                                  DestinationUrl = c.Url,
                                                  LinkText = c.Name,
                                                  SourcePageType = PageType.MatchClassification,
                                                  SourcePageUrl = c.LocationIndexUrl
                                              };

            List<MatchClassification> matchClassifications = new List<MatchClassification>();
            var taskQueue = new Queue<Task<MatchClassification>>();

            foreach (var link in classificationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task<MatchClassification>.Factory.StartNew(() => CrawlClassification(l, l.LinkText, crawlResults.Season)));
            }

            if (taskQueue.Count == 0)
            {
                Console.WriteLine(" done.");
                Log.InfoFormat("No URLs to check for season {0}", crawlResults.Season);
                return crawlResults;
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                    completedTasks =>
                    {
                        foreach (Task<MatchClassification> task in completedTasks)
                        {
                            if (task.Exception == null)
                                matchClassifications.Add(task.Result);
                            else
                            {
                                Log.Error("Unexpected exception",
                                        task.Exception);
                            }
                        }
                    })
                    .Wait();

            Console.WriteLine(" done.");

            return new CrawlResults
                       {
                           Id = crawlResults.Id,
                           Season = crawlResults.Season,
                           Classifications = matchClassifications
                       };
        }