public CrawlResults Crawl(Season season) { Console.Write("Crawling " + season.Name + " "); CrawlResults results = new CrawlResults { Season = season.Name}; CrawlerLinkDetails seasonPage = new CrawlerLinkDetails { SourcePageType = PageType.SeasonList, SourcePageUrl = "http://cricketarchive.com/Archive/Seasons/index.html", DestinationPageType = PageType.LocationList, DestinationUrl = season.Url, LinkText = season.Name }; List<CrawlerLinkDetails> locationLinks = PageCrawler.CrawlLinksPage(seasonPage); List<MatchClassification> classifications = new List<MatchClassification>(); var taskQueue = new Queue<Task<List<MatchClassification>>>(); foreach (var link in locationLinks) { CrawlerLinkDetails l = link; taskQueue.Enqueue(Task<List<MatchClassification>>.Factory.StartNew(() => CrawlLocation(l, season.Name))); } Task.Factory.ContinueWhenAll(taskQueue.ToArray(), completedTasks => { foreach (Task<List<MatchClassification>> task in completedTasks) { if (task.Exception == null) classifications.AddRange(task.Result); else { Log.Error("Unexpected exception", task.Exception); } } }) .Wait(); results.Classifications = classifications; Console.WriteLine(" done."); return results; }
protected static string DumpResults(CrawlResults results) { if (results.Classifications.Count == 0) return "No Somerset matches found in " + results.Season; StringBuilder sb = new StringBuilder(); sb.AppendLine(new string('-', 120)); string title = "Somerset matches in " + results.Season; sb.Append("| ").Append(title.PadRight(116)).AppendLine(" |"); sb.AppendLine(new string('-', 120)); var matches = from c in results.Classifications from sc in c.Scorecards select sc; foreach (ScorecardDetails match in matches) { sb.Append("| ").Append(FormatValue(match, 116)).AppendLine(" |"); } sb.AppendLine(new string('-', 120)); return sb.ToString(); }
public static string GenerateKey(CrawlResults crawlResults) { return "crawler/" + crawlResults.Season; }
public static string GenerateKey(CrawlResults crawlResults) { return("crawler/" + crawlResults.Season); }
public CrawlResults Recheck(CrawlResults crawlResults) { Console.Write("Rechecking {0} ", crawlResults.Season); var classificationLinks = from c in crawlResults.Classifications select new CrawlerLinkDetails { DestinationPageType = PageType.MatchList, DestinationUrl = c.Url, LinkText = c.Name, SourcePageType = PageType.MatchClassification, SourcePageUrl = c.LocationIndexUrl }; List<MatchClassification> matchClassifications = new List<MatchClassification>(); var taskQueue = new Queue<Task<MatchClassification>>(); foreach (var link in classificationLinks) { CrawlerLinkDetails l = link; taskQueue.Enqueue(Task<MatchClassification>.Factory.StartNew(() => CrawlClassification(l, l.LinkText, crawlResults.Season))); } if (taskQueue.Count == 0) { Console.WriteLine(" done."); Log.InfoFormat("No URLs to check for season {0}", crawlResults.Season); return crawlResults; } Task.Factory.ContinueWhenAll(taskQueue.ToArray(), completedTasks => { foreach (Task<MatchClassification> task in completedTasks) { if (task.Exception == null) matchClassifications.Add(task.Result); else { Log.Error("Unexpected exception", task.Exception); } } }) .Wait(); Console.WriteLine(" done."); return new CrawlResults { Id = crawlResults.Id, Season = crawlResults.Season, Classifications = matchClassifications }; }