private static string[] ExtractTeams(HtmlNode game, ScrapingInfo info) { var separators = new string[] { " vs. ", " vs ", " v " }; string[] teams = null; if (info.HomeTeamNameExpression != info.AwayTeamNameExpression || info.HomeTeamAttribute != info.AwayTeamAttribute) { teams = new string[2]; teams[0] = ReadFromNode(game, info.HomeTeamNameExpression, info.HomeTeamAttribute); teams[1] = ReadFromNode(game, info.AwayTeamNameExpression, info.AwayTeamAttribute); } else { var teamsOneLine = ReadFromNode(game, info.HomeTeamNameExpression, info.HomeTeamAttribute).Replace(" ", " "); foreach (var sep in separators) { if (teamsOneLine.Contains(sep)) { teams = teamsOneLine.Split(new[] { sep }, StringSplitOptions.RemoveEmptyEntries); break; } } } if (teams != null) { return(teams); } else { throw new FormatException(); } }
private static DateTime GetDateTime(HtmlNode game, ScrapingInfo info) { var date = ParseDateTime(ReadFromNode(game, info.DateExpression, info.DateAttribute), info.DateFormat); var time = ParseDateTime(ReadFromNode(game, info.TimeExpression, info.TimeAttribute), info.TimeFormat); return(new DateTime(date.Year, date.Month, date.Day, time.Hour, time.Minute, time.Second)); }
static bool TryParsePage(string html, int siteId, ScrapingInfo info) { var page = new HtmlDocument(); page.LoadHtml(html); var games = page.DocumentNode.SelectNodes(info.GameListExpression); if (games == null) { return(false); } foreach (var game in games) { var datetime = GetDateTime(game, info); var teams = ExtractTeams(game, info); var team1 = teams[0].Trim(); var team2 = teams[1].Trim(); var r1 = ParseOdd(ReadFromNode(game, info.HomeRatioExpression, info.HomeRatioAttribute)); var rX = ParseOdd(ReadFromNode(game, info.RatioXExpression, info.RatioXAttribute)); var r2 = ParseOdd(ReadFromNode(game, info.AwayRatioExpression, info.AwayRatioAttribute)); AddGame(siteId, datetime, team1, team2, r1, rX, r2); //Console.WriteLine("{0}: {1} vs {2}\nHome: {3}\tDraw: {4}\tAway: {5}\n\n", datetime, team1, team2, r1, rX, r2); } return(true); }