protected override async Task ScrapeData() { // Fetch all teams from web portal var teams = await WebPortalHelper.GetTeams(Helper.GetSportCode()); if (teams == null || teams.Count == 0) { throw new ArgumentNullException(nameof(teams), "Teams from web portal is null"); } FromDate = Helper.ToMinTime(FromDate); ToDate = Helper.ToMaxTime(ToDate); var totalDays = Convert.ToInt32((ToDate - FromDate).TotalDays); for (var date = FromDate.Date; date <= ToDate.Date; date = date.AddDays(1)) { Logger.Information($"Scrape matches: {date.ToShortDateString()}"); await UpdateScrapeStatus(null, $"Scrape matches: {date.ToShortDateString()}"); var url = $"http://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard?lang=en®ion=au&calendartype=blacklist&limit=100&dates={Helper.GetDate(date)}&tz=Australia%2FMelbourne"; var rawResult = await ScrapeHelper.GetDocument(url); var jObject = JObject.Parse(rawResult); if (jObject == null) { throw new ArgumentNullException(nameof(jObject)); } var sportCode = Helper.GetSportCode(); var teamCodes = new List <string>(); var competitions = jObject.SelectTokens("$.events[*].competitions[*]"); foreach (var competition in competitions) { var homeAbbr = competition.SelectToken("$.competitors[0].team.abbreviation").ToString(); var awayAbbr = competition.SelectToken("$.competitors[1].team.abbreviation").ToString(); if (teams.All(x => x.ShortName != homeAbbr)) { throw new Exception($"Cannot find any home team named '{homeAbbr}' in teams"); } if (teams.All(x => x.ShortName != awayAbbr)) { throw new Exception($"Cannot find any away team named '{awayAbbr}' in teams"); } teamCodes.Add(homeAbbr); teamCodes.Add(awayAbbr); var homeTeamId = teams.First(x => x.ShortName.Equals(homeAbbr)).Id; var awayTeamId = teams.First(x => x.ShortName.Equals(awayAbbr)).Id; var homeTeamName = competition.SelectToken("$.competitors[0].team.name").ToString(); var awayTeamName = competition.SelectToken("$.competitors[1].team.name").ToString(); homeTeamName = homeTeamName.Substring(0, Math.Min(homeTeamName.Length, 3)).ToUpper(); awayTeamName = awayTeamName.Substring(0, Math.Min(awayTeamName.Length, 3)).ToUpper(); var gameCode = $"{Helper.GetDate(date, "MMddyyyy")}{homeTeamName}{awayTeamName}"; var gameDate = competition.SelectToken("$.date").ToString(); Logger.Information($"Match: {gameCode}, {homeTeamName} vs {awayTeamName}, {gameDate}"); Matches.Add(new Match { StartTime = DateTime.Parse(gameDate), HomeTeamId = homeTeamId, AwayTeamId = awayTeamId, GameCode = gameCode, SportCode = sportCode }); } Logger.Information("Scraped matches complete"); Logger.Information("Scrape players from teams"); const string baseTeamsUrl = "https://www.espn.com/nba/team/stats/_/name"; const string xPathToPlayers = "/html/body/div[1]/div/div/div/div/div[5]/div[2]/div[5]/div[1]/div/section/div/section[1]/div[2]/table/tbody/tr[*]/td/span/a"; var playerTasks = new List <Task <HtmlNodeCollection> >(); foreach (var teamCode in teamCodes) { url = $"{baseTeamsUrl}/{teamCode}"; playerTasks.Add(ScrapeHelper.GetInnerHtml(url, xPathToPlayers)); } var nodes = await Task.WhenAll(playerTasks); for (var i = 0; i < teamCodes.Count; i++) { var teamId = teams.First(x => x.ShortName == teamCodes[i]).Id; Logger.Information($"Scrape player from: {baseTeamsUrl}/{teamCodes[i]}"); ExtractPlayers(nodes[i], teamId); } Logger.Information("Scrape players complete"); var newProgress = GetScrapingInformation().Progress; newProgress = Math.Min(newProgress + 90 / totalDays, 90); await UpdateScrapeStatus(newProgress, $"Scrape matches: {date.ToShortDateString()} complete"); } }