Beispiel #1
0
        private static void DownloadScorecards(FileStore dataStore, Season season)
        {
            CrawlResults crawlResults = GetCrawlResultsForSeason(dataStore, season);
            if (crawlResults == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Log.InfoFormat("Scorecard download started at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);

            var matchRecords = crawlResults.Classifications.SelectMany(m => m.Scorecards);

            Queue<Task> tasks = new Queue<Task>();

            foreach (ScorecardDetails md in matchRecords)
            {
                if (md.ScorecardAvailable && !string.IsNullOrEmpty(md.ScorecardUrl))
                {
                    Log.InfoFormat("Downloading scorecard for {0}", md);

                    tasks.Enqueue(DownloadScorecardAsync(md, dataStore));
                }
            }

            Task.WaitAll(tasks.ToArray());

            SaveCrawlerResults(crawlResults, dataStore);

            Log.InfoFormat("Scorecard download finished at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);
        }
Beispiel #2
0
        private void ReduceBatting(Season season, FileStore dataStore)
        {
            List<BattingRecord> records =
                dataStore.Load<List<BattingRecord>>(IndividualBattingMap.GenerateId(season.Name));

            Batting.Reduce(records, dataStore);
        }
Beispiel #3
0
        private static CrawlResults RunCrawler(Season season)
        {
            Log.InfoFormat("Crawler started at {0} for season {1}", DateTime.Now.ToShortTimeString(), season.Name);

            Spider crawler = new Spider();
            CrawlResults results = crawler.Crawl(season);
            Log.InfoFormat("\n{0}", DumpResults(results));

            Log.InfoFormat("Crawler finished at {0}.", DateTime.Now.ToShortTimeString());

            return results;
        }
Beispiel #4
0
        private static void RecheckSeason(FileStore dataStore, Season season)
        {
            CrawlResults existing = GetCrawlResultsForSeason(dataStore, season);
            if (existing == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Spider spider = new Spider();
            CrawlResults recheckResults = spider.Recheck(existing);

            Log.InfoFormat("\n{0}", DumpResults(recheckResults));

            Log.InfoFormat("Crawler finished at {0}.", DateTime.Now.ToShortTimeString());
            SaveCrawlerResults(recheckResults, dataStore);
        }
Beispiel #5
0
        public CrawlResults Crawl(Season season)
        {
            Console.Write("Crawling " + season.Name + " ");
            CrawlResults results = new CrawlResults { Season = season.Name};

            CrawlerLinkDetails seasonPage = new CrawlerLinkDetails
                                                {
                                                    SourcePageType = PageType.SeasonList,
                                                    SourcePageUrl = "http://cricketarchive.com/Archive/Seasons/index.html",
                                                    DestinationPageType = PageType.LocationList,
                                                    DestinationUrl = season.Url,
                                                    LinkText = season.Name
                                                };

            List<CrawlerLinkDetails> locationLinks = PageCrawler.CrawlLinksPage(seasonPage);

            List<MatchClassification> classifications = new List<MatchClassification>();

            var taskQueue = new Queue<Task<List<MatchClassification>>>();

            foreach (var link in locationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task<List<MatchClassification>>.Factory.StartNew(() => CrawlLocation(l, season.Name)));
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                    completedTasks =>
                        {
                            foreach (Task<List<MatchClassification>> task in completedTasks)
                            {
                                if (task.Exception == null)
                                    classifications.AddRange(task.Result);
                                else
                                {
                                    Log.Error("Unexpected exception",
                                            task.Exception);
                                }
                            }
                        })
                    .Wait();

            results.Classifications = classifications;
            Console.WriteLine(" done.");
            return results;
        }
Beispiel #6
0
        private static void ParseScorecards(FileStore dataStore, Season season)
        {
            CrawlResults crawlResults = GetCrawlResultsForSeason(dataStore, season);
            if (crawlResults == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Log.InfoFormat("Scorecard parsing started at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);

            var matchRecords = crawlResults.Classifications.SelectMany(m => m.Scorecards);

            foreach (ScorecardDetails md in matchRecords)
            {
                ParseScorecard(dataStore, md);
            }

            Log.InfoFormat("Scorecard parsing finished at {0} for season {1}", DateTime.Now.ToShortTimeString(), season.Name);
        }
Beispiel #7
0
        private void RunMapFunctions(FileStore dataStore, Season season)
        {
            CrawlResults crawlResults = GetCrawlResultsForSeason(dataStore, season);
            if (crawlResults == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Log.InfoFormat("Map started at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);

            var matchRecords = crawlResults.Classifications.SelectMany(m => m.Scorecards);

            if (matchRecords.Count() == 0)
            {
                Log.InfoFormat("No match records found for {0}", season.Name);
                return;
            }

            var battingTasks = new Queue<Task<List<BattingRecord>>>();
            var bowlingTasks = new Queue<Task<List<BowlingRecord>>>();
            var fieldingTasks = new Queue<Task<List<FieldingRecord>>>();

            foreach (ScorecardDetails details in matchRecords)
            {
                string id = CricketMatch.GenerateId(details.Season, details.MatchCode);
                CricketMatch match = dataStore.Load<CricketMatch>(id);
                if (match != null)
                {
                    battingTasks.Enqueue(Task<List<BattingRecord>>.Factory.StartNew(() => IndividualBattingMap.Run(match)));
                    bowlingTasks.Enqueue(Task<List<BowlingRecord>>.Factory.StartNew(() => IndividualBowlingMap.Run(match)));
                    fieldingTasks.Enqueue(Task<List<FieldingRecord>>.Factory.StartNew(() => IndividualFieldingMap.Run(match)));
                }
            }

            List<BattingRecord> battingRecords = new List<BattingRecord>();
            List<BowlingRecord> bowlingRecords = new List<BowlingRecord>();
            List<FieldingRecord> fieldingRecords = new List<FieldingRecord>();

            Task[] continuations = new[] {
                Task.Factory.ContinueWhenAll(battingTasks.ToArray(),
                                             completedTasks =>
                                                 {
                                                     foreach (var task in completedTasks)
                                                     {
                                                         if (task.Exception == null)
                                                             battingRecords.AddRange(task.Result);
                                                         else
                                                             Log.Error("Unexpected exception", task.Exception);
                                                     }
                                                 }),
                Task.Factory.ContinueWhenAll(bowlingTasks.ToArray(),
                                             completedTasks =>
                                             {
                                                 foreach (var task in completedTasks)
                                                 {
                                                     if (task.Exception == null)
                                                         bowlingRecords.AddRange(task.Result);
                                                     else
                                                         Log.Error("Unexpected exception", task.Exception);
                                                 }
                                             }),
                Task.Factory.ContinueWhenAll(fieldingTasks.ToArray(),
                                             completedTasks =>
                                             {
                                                 foreach (var task in completedTasks)
                                                 {
                                                     if (task.Exception == null)
                                                         fieldingRecords.AddRange(task.Result);
                                                     else
                                                         Log.Error("Unexpected exception", task.Exception);
                                                 }
                                             })};

            Task.WaitAll(continuations);

            dataStore.Save(battingRecords, IndividualBattingMap.GenerateId(season.Name));
            dataStore.Save(bowlingRecords, IndividualBowlingMap.GenerateId(season.Name));
            dataStore.Save(fieldingRecords, IndividualFieldingMap.GenerateId(season.Name));

            Log.InfoFormat("Scorecard parsing finished at {0} for season {1}", DateTime.Now.ToShortTimeString(), season.Name);
        }