Beispiel #1
0
        protected static string DumpResults(CrawlResults results)
        {
            if (results.Classifications.Count == 0)
            {
                return("No Somerset matches found in " + results.Season);
            }

            StringBuilder sb = new StringBuilder();

            sb.AppendLine(new string('-', 120));
            string title = "Somerset matches in " + results.Season;

            sb.Append("| ").Append(title.PadRight(116)).AppendLine(" |");
            sb.AppendLine(new string('-', 120));

            var matches = from c in results.Classifications
                          from sc in c.Scorecards
                          select sc;

            foreach (ScorecardDetails match in matches)
            {
                sb.Append("| ").Append(FormatValue(match, 116)).AppendLine(" |");
            }
            sb.AppendLine(new string('-', 120));

            return(sb.ToString());
        }
Beispiel #2
0
        private static void DownloadScorecards(FileStore dataStore, Season season)
        {
            CrawlResults crawlResults = GetCrawlResultsForSeason(dataStore, season);

            if (crawlResults == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Log.InfoFormat("Scorecard download started at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);

            var matchRecords = crawlResults.Classifications.SelectMany(m => m.Scorecards);

            Queue <Task> tasks = new Queue <Task>();

            foreach (ScorecardDetails md in matchRecords)
            {
                if (md.ScorecardAvailable && !string.IsNullOrEmpty(md.ScorecardUrl))
                {
                    Log.InfoFormat("Downloading scorecard for {0}", md);

                    tasks.Enqueue(DownloadScorecardAsync(md, dataStore));
                }
            }

            Task.WaitAll(tasks.ToArray());

            SaveCrawlerResults(crawlResults, dataStore);

            Log.InfoFormat("Scorecard download finished at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);
        }
Beispiel #3
0
        private CrawlResults GetResults(HttpClient client, string statusUri)
        {
            CrawlResults results = null;

            using (HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Get, statusUri))
            {
                using (HttpResponseMessage response = client.SendAsync(request).Result)
                {
                    switch (response.StatusCode)
                    {
                    case HttpStatusCode.Accepted:
                        break;

                    case HttpStatusCode.OK:
                        var rawResults     = response.Content.ReadAsStringAsync().Result;
                        var statusResponse = JsonConvert.DeserializeObject <StatusResponse>(rawResults);

                        if (statusResponse.RuntimeStatus == "Completed")
                        {
                            results = statusResponse.Output;
                        }
                        else
                        {
                            throw new Exception($"Unexpected RuntimeStatus: {statusResponse.RuntimeStatus}");
                        }
                        break;

                    default:
                        throw new Exception($"Failed to get status for Crawl.  Received error code {response.StatusCode}");
                    }
                }
            }

            return(results);
        }
Beispiel #4
0
        protected static void SaveCrawlerResults(CrawlResults results, FileStore dataStore)
        {
            if (string.IsNullOrEmpty(results.Id))
            {
                results.Id = CrawlResults.GenerateKey(results);
            }

            dataStore.Save(results, results.Id);
        }
Beispiel #5
0
        public CrawlResults Recheck(CrawlResults crawlResults)
        {
            Console.Write("Rechecking {0} ", crawlResults.Season);
            var classificationLinks = from c in crawlResults.Classifications
                                      select
                                      new CrawlerLinkDetails
            {
                DestinationPageType = PageType.MatchList,
                DestinationUrl      = c.Url,
                LinkText            = c.Name,
                SourcePageType      = PageType.MatchClassification,
                SourcePageUrl       = c.LocationIndexUrl
            };

            List <MatchClassification> matchClassifications = new List <MatchClassification>();
            var taskQueue = new Queue <Task <MatchClassification> >();

            foreach (var link in classificationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task <MatchClassification> .Factory.StartNew(() => CrawlClassification(l, l.LinkText, crawlResults.Season)));
            }

            if (taskQueue.Count == 0)
            {
                Console.WriteLine(" done.");
                Log.InfoFormat("No URLs to check for season {0}", crawlResults.Season);
                return(crawlResults);
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                                         completedTasks =>
            {
                foreach (Task <MatchClassification> task in completedTasks)
                {
                    if (task.Exception == null)
                    {
                        matchClassifications.Add(task.Result);
                    }
                    else
                    {
                        Log.Error("Unexpected exception",
                                  task.Exception);
                    }
                }
            })
            .Wait();

            Console.WriteLine(" done.");

            return(new CrawlResults
            {
                Id = crawlResults.Id,
                Season = crawlResults.Season,
                Classifications = matchClassifications
            });
        }
Beispiel #6
0
        public void Crawl(CrawlRequest crawlRequest)
        {
            _results = null;
            using (HttpClient client = new HttpClient())
            {
                var statusUri = Start(client, crawlRequest);

                _results = GetResultsWithRetry(client, statusUri, RESULT_RETRIES);
            }
        }
Beispiel #7
0
        public void Execute(FileStore dataStore)
        {
            foreach (Season season in GetSeasons(dataStore, StartSeason, EndSeason))
            {
                Log.DebugFormat("Crawling Season {0} at {1}", season.Name, season.Url);

                CrawlResults results = RunCrawler(season);
                SaveCrawlerResults(results, dataStore);
            }
        }
Beispiel #8
0
        private static CrawlResults RunCrawler(Season season)
        {
            Log.InfoFormat("Crawler started at {0} for season {1}", DateTime.Now.ToShortTimeString(), season.Name);

            Spider       crawler = new Spider();
            CrawlResults results = crawler.Crawl(season);

            Log.InfoFormat("\n{0}", DumpResults(results));

            Log.InfoFormat("Crawler finished at {0}.", DateTime.Now.ToShortTimeString());

            return(results);
        }
Beispiel #9
0
        public CrawlResults Crawl(Season season)
        {
            Console.Write("Crawling " + season.Name + " ");
            CrawlResults results = new CrawlResults {
                Season = season.Name
            };

            CrawlerLinkDetails seasonPage = new CrawlerLinkDetails
            {
                SourcePageType      = PageType.SeasonList,
                SourcePageUrl       = "http://cricketarchive.com/Archive/Seasons/index.html",
                DestinationPageType = PageType.LocationList,
                DestinationUrl      = season.Url,
                LinkText            = season.Name
            };

            List <CrawlerLinkDetails> locationLinks = PageCrawler.CrawlLinksPage(seasonPage);

            List <MatchClassification> classifications = new List <MatchClassification>();

            var taskQueue = new Queue <Task <List <MatchClassification> > >();

            foreach (var link in locationLinks)
            {
                CrawlerLinkDetails l = link;
                taskQueue.Enqueue(Task <List <MatchClassification> > .Factory.StartNew(() => CrawlLocation(l, season.Name)));
            }

            Task.Factory.ContinueWhenAll(taskQueue.ToArray(),
                                         completedTasks =>
            {
                foreach (Task <List <MatchClassification> > task in completedTasks)
                {
                    if (task.Exception == null)
                    {
                        classifications.AddRange(task.Result);
                    }
                    else
                    {
                        Log.Error("Unexpected exception",
                                  task.Exception);
                    }
                }
            })
            .Wait();

            results.Classifications = classifications;
            Console.WriteLine(" done.");
            return(results);
        }
Beispiel #10
0
        private static void RecheckSeason(FileStore dataStore, Season season)
        {
            CrawlResults existing = GetCrawlResultsForSeason(dataStore, season);

            if (existing == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Spider       spider         = new Spider();
            CrawlResults recheckResults = spider.Recheck(existing);

            Log.InfoFormat("\n{0}", DumpResults(recheckResults));

            Log.InfoFormat("Crawler finished at {0}.", DateTime.Now.ToShortTimeString());
            SaveCrawlerResults(recheckResults, dataStore);
        }
Beispiel #11
0
        private static void ParseScorecards(FileStore dataStore, Season season)
        {
            CrawlResults crawlResults = GetCrawlResultsForSeason(dataStore, season);

            if (crawlResults == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Log.InfoFormat("Scorecard parsing started at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);

            var matchRecords = crawlResults.Classifications.SelectMany(m => m.Scorecards);

            foreach (ScorecardDetails md in matchRecords)
            {
                ParseScorecard(dataStore, md);
            }

            Log.InfoFormat("Scorecard parsing finished at {0} for season {1}", DateTime.Now.ToShortTimeString(), season.Name);
        }
Beispiel #12
0
        private CrawlResults GetResultsWithRetry(HttpClient client, string statusUri, int retries)
        {
            CrawlResults results = null;

            int attempts = 0;

            while (results == null && attempts < retries)
            {
                // Sleep between each check
                Thread.Sleep(SECONDS_TO_SLEEP_BETWEEN_RESULT_ATTEMPTS);

                attempts++;

                results = GetResults(client, statusUri);
            }

            // If attempts have hit the retries and still no results, then throw an error
            if (results == null && attempts == retries)
            {
                throw new Exception(String.Format("Unable to received crawl results for {0} - have tried {1} times", statusUri, attempts));
            }

            return(results);
        }
Beispiel #13
0
        private void RunMapFunctions(FileStore dataStore, Season season)
        {
            CrawlResults crawlResults = GetCrawlResultsForSeason(dataStore, season);

            if (crawlResults == null)
            {
                Log.WarnFormat("Season {0} has not been crawled yet.", season.Name);
                return;
            }

            Log.InfoFormat("Map started at {0} for season {1}", DateTime.Now.ToShortTimeString(), crawlResults.Season);

            var matchRecords = crawlResults.Classifications.SelectMany(m => m.Scorecards);

            if (matchRecords.Count() == 0)
            {
                Log.InfoFormat("No match records found for {0}", season.Name);
                return;
            }


            var battingTasks  = new Queue <Task <List <BattingRecord> > >();
            var bowlingTasks  = new Queue <Task <List <BowlingRecord> > >();
            var fieldingTasks = new Queue <Task <List <FieldingRecord> > >();

            foreach (ScorecardDetails details in matchRecords)
            {
                string       id    = CricketMatch.GenerateId(details.Season, details.MatchCode);
                CricketMatch match = dataStore.Load <CricketMatch>(id);
                if (match != null)
                {
                    battingTasks.Enqueue(Task <List <BattingRecord> > .Factory.StartNew(() => IndividualBattingMap.Run(match)));
                    bowlingTasks.Enqueue(Task <List <BowlingRecord> > .Factory.StartNew(() => IndividualBowlingMap.Run(match)));
                    fieldingTasks.Enqueue(Task <List <FieldingRecord> > .Factory.StartNew(() => IndividualFieldingMap.Run(match)));
                }
            }

            List <BattingRecord>  battingRecords  = new List <BattingRecord>();
            List <BowlingRecord>  bowlingRecords  = new List <BowlingRecord>();
            List <FieldingRecord> fieldingRecords = new List <FieldingRecord>();

            Task[] continuations = new[] {
                Task.Factory.ContinueWhenAll(battingTasks.ToArray(),
                                             completedTasks =>
                {
                    foreach (var task in completedTasks)
                    {
                        if (task.Exception == null)
                        {
                            battingRecords.AddRange(task.Result);
                        }
                        else
                        {
                            Log.Error("Unexpected exception", task.Exception);
                        }
                    }
                }),
                Task.Factory.ContinueWhenAll(bowlingTasks.ToArray(),
                                             completedTasks =>
                {
                    foreach (var task in completedTasks)
                    {
                        if (task.Exception == null)
                        {
                            bowlingRecords.AddRange(task.Result);
                        }
                        else
                        {
                            Log.Error("Unexpected exception", task.Exception);
                        }
                    }
                }),
                Task.Factory.ContinueWhenAll(fieldingTasks.ToArray(),
                                             completedTasks =>
                {
                    foreach (var task in completedTasks)
                    {
                        if (task.Exception == null)
                        {
                            fieldingRecords.AddRange(task.Result);
                        }
                        else
                        {
                            Log.Error("Unexpected exception", task.Exception);
                        }
                    }
                })
            };

            Task.WaitAll(continuations);



            dataStore.Save(battingRecords, IndividualBattingMap.GenerateId(season.Name));
            dataStore.Save(bowlingRecords, IndividualBowlingMap.GenerateId(season.Name));
            dataStore.Save(fieldingRecords, IndividualFieldingMap.GenerateId(season.Name));

            Log.InfoFormat("Scorecard parsing finished at {0} for season {1}", DateTime.Now.ToShortTimeString(), season.Name);
        }