public static void Add_RP_Merge() { db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); db_racing_read.CommandTimeout = 180; while (db_racing_read.RP_Match_By_Parents.Any()) { List <RP_Match_By_Parent> view_recs = db_racing_read.RP_Match_By_Parents.Take(500).ToList(); foreach (RP_Match_By_Parent view_rec in view_recs) { Logger.WriteLog("Attempting RP Merge for " + view_rec.Name + " " + view_rec.Id); Horse rp_horse = db_racing_update.Horses.Where(x => x.Id == view_rec.Id).FirstOrDefault(); if (rp_horse == null) { continue; } rp_horse.PPMatchBasis = "Failed"; if (rp_horse.FoalYear == null || view_rec.SireFoalYear == null || view_rec.DamFoalYear == null) { db_racing_update.SubmitChanges(); continue; } int sire_diff = (int)rp_horse.FoalYear - (int)view_rec.SireFoalYear; if (sire_diff < 3 || sire_diff > 25) { db_racing_update.SubmitChanges(); continue; } int dam_diff = (int)rp_horse.FoalYear - (int)view_rec.DamFoalYear; if (dam_diff < 3 || dam_diff > 25) { db_racing_update.SubmitChanges(); continue; } var merge_horse = new Horse_Merged(); db_racing_update.Horse_Mergeds.InsertOnSubmit(merge_horse); merge_horse.RHId = rp_horse.Id; merge_horse.Name = rp_horse.Name; merge_horse.Country = rp_horse.Country; merge_horse.FoalDate = rp_horse.FoalDate; merge_horse.FoalYear = rp_horse.FoalYear; merge_horse.Colour = rp_horse.Colour; if (rp_horse.Sex != null) { switch (rp_horse.Sex) { case "f": case "m": merge_horse.Sex = "f"; break; case "c": case "h": merge_horse.Sex = "c"; break; case "g": merge_horse.Sex = "g"; break; case "r": merge_horse.Sex = "r"; break; } } merge_horse.SireId = view_rec.SireId; merge_horse.DamId = view_rec.DamId; merge_horse.Haplo = db_racing_read.Horse_Mergeds.Where(x => x.Id == merge_horse.DamId) .Select(x => x.Haplo) .FirstOrDefault(); merge_horse.MergeBasis = "RP Added"; db_racing_update.SubmitChanges(); rp_horse.PPId = merge_horse.Id; rp_horse.PPMatchBasis = "RP Added"; db_racing_update.SubmitChanges(); } db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); } string cmd = "UPDATE Horse SET PPMatchBasis = 'Failed' WHERE PPMatchBasis = 'Failed RP_PQ_PP'"; db_racing_update.ExecuteCommand(cmd); }
private static void ScrapeDatePage(string page, DateTime date) { string meeting_head = ""; string meeting_tail = ""; string races = ""; string re1 = "(<)"; // Any Single Character 1 string re2 = "(div)"; // Word 1 string re3 = "( )"; // Any Single Character 2 string re4 = "(class)"; // Word 2 string re5 = "(=)"; // Any Single Character 3 string re6 = "(\"rp-resultsWrapper__content\")"; // Double Quote String 1 string re7 = ".*?"; // Non-greedy match on filler string re8 = "()"; // Tag 1 string re9 = ".*?"; // Non-greedy match on filler string re10 = "(<\\/h1>)"; // Tag 2 string re11 = "(<\\/div>)"; // Tag 3 Regex r = new Regex(re1 + re2 + re3 + re4 + re5 + re6 + re7 + re8 + re9 + re10 + re11, RegexOptions.IgnoreCase | RegexOptions.Singleline); //var regex_meeting = // new Regex(@"(<)(div)( )(class)(=)(")(rp)(-)(resultsWrapper__content)(")(>)(<[^>]+>).*?<.*?<.*?(<)(\/)(h1)(>)(<)(\/div)(>)", // RegexOptions.Singleline); Match match_meeting = r.Match(page); while (match_meeting.Success) { meeting_head = match_meeting.Groups[1].ToString(); meeting_tail = match_meeting.Groups[2].ToString(); races = match_meeting.Groups[3].ToString(); var regex_course = new Regex( @"<a href=""http://www.racingpost.com/horses/course_home.sd\?crs_id=(\d+)[^>]+>([^<]+)</a>"); Match match_course = regex_course.Match(meeting_head); if (match_course.Success) { int course_id = Convert.ToInt32(match_course.Groups[1].ToString()); string course_whole = match_course.Groups[2].ToString().Replace("(AW)", ":AW:"); string course = ""; string country = ""; var regex_course2 = new Regex(@"([^(]+)(\([^(]+\)){0,1}"); Match match_course2 = regex_course2.Match(course_whole); if (match_course2.Success) { course = match_course2.Groups[1].ToString().Replace(":AW:", "(AW)").Trim(); country = match_course2.Groups[2].ToString().Replace("(", "").Replace(")", "").Trim(); } string going = ""; var regex_going = new Regex(@"<strong>GOING:</strong>([^<\n]+)", RegexOptions.Singleline); Match match_going = regex_going.Match(meeting_tail); if (match_going.Success) { going = match_going.Groups[1].ToString().Trim(); } string weather = ""; var regex_weather = new Regex(@"<strong>Weather conditions:</strong>([^<\n]+)", RegexOptions.Singleline); Match match_weather = regex_weather.Match(meeting_tail); if (match_weather.Success) { weather = match_weather.Groups[1].ToString().Trim(); } string stalls = ""; var regex_stalls = new Regex(@"<strong>STALLS:</strong>([^<\n]+)", RegexOptions.Singleline); Match match_stalls = regex_stalls.Match(meeting_tail); if (match_stalls.Success) { stalls = match_stalls.Groups[1].ToString().Trim(); } Course course_rec = db_rph.Courses.Where(x => x.Id == course_id).FirstOrDefault(); if (course_rec == null) { course_rec = new Course(); course_rec.Id = course_id; db_rph.Courses.InsertOnSubmit(course_rec); } course_rec.Name = course; course_rec.Country = country; db_rph.SubmitChanges(); Meeting meeting_rec = db_rph.Meetings.Where(x => x.CourseId == course_id && x.DateOfMeeting == date).FirstOrDefault(); if (meeting_rec != null) { string cmd = String.Format("DELETE FROM Meeting WHERE Id = {0}", meeting_rec.Id); db_rph.ExecuteCommand(cmd); cmd = String.Format("DELETE FROM ScrapeRace WHERE MeetingId = {0}", meeting_rec.Id); db_rph.ExecuteCommand(cmd); } meeting_rec = new Meeting(); db_rph.Meetings.InsertOnSubmit(meeting_rec); meeting_rec.CourseId = course_id; meeting_rec.DateOfMeeting = date; meeting_rec.Going = going; meeting_rec.Weather = weather; meeting_rec.Stalls = stalls; db_rph.SubmitChanges(); var regex_race = new Regex(@"<td(.*?)</td>", RegexOptions.Singleline); Match match_race = regex_race.Match(races); while (match_race.Success) { string race = match_race.Groups[1].ToString(); string race_link = ""; int race_id = 0; var regex_race_link = new Regex(@"<a href=""(/horses/result_home\.sd\?race_id=(\d+)[^""]+)""", RegexOptions.Singleline); Match match_race_link = regex_race_link.Match(race); if (match_race_link.Success) { race_link = match_race_link.Groups[1].ToString().Trim(); race_id = Convert.ToInt32(match_race_link.Groups[2].ToString().Trim()); var race_rec = new ScrapeRace(); db_rph.ScrapeRaces.InsertOnSubmit(race_rec); race_rec.MeetingId = meeting_rec.Id; race_rec.Link = "http://www.racingpost.com" + race_link; race_rec.Scraped = false; race_rec.RaceId = race_id; race_rec.RaceDate = date; db_rph.SubmitChanges(); } match_race = match_race.NextMatch(); } } match_meeting = match_meeting.NextMatch(); } }