public static void ScrapeHorses() { Common.rand = new Random(); db_rph = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); int count = db_rph.Horses.Count(x => x.DetailProcessed == 0 && x.PriorityProcess); while (db_rph.Horses.Any(x => x.DetailProcessed == 0 && x.PriorityProcess)) { foreach (Horse scrape in db_rph.Horses.Where(x => x.DetailProcessed == 0 && x.PriorityProcess).Take(500)) { Logger.WriteLog((count--).ToString() + " - " + scrape.Name); string status = ""; string page = ""; GetHorse((int)scrape.RPId, ref status, ref page); scrape.DetailRaw = "Error"; scrape.DetailProcessed = -1; if (status == "Complete") { var regex_name = new Regex(@"<h1>\s+([^(<]+)(?:\(([A-Z]+)\) ){0,1}</h1>"); Match match_name = regex_name.Match(page); if (match_name.Success) { string country = match_name.Groups[2].ToString(); if (country == "") { country = "GB"; } scrape.Country = country; } var regex_header = new Regex("<ul id=\"detailedInfo\">(.*?)</ul>", RegexOptions.Singleline); Match match_header = regex_header.Match(page); if (match_header.Success) { scrape.DetailRaw = match_header.Groups[1].ToString(); ProcessDetail(scrape); scrape.DetailProcessed = 1; } else { Logger.WriteLog("Detail not found: " + scrape.Name); } } else { Logger.WriteLog("Horse retrieval error " + status + " - " + scrape.Name); } db_rph.SubmitChanges(); Common.Wait(); } db_rph = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); } }
public static void ScrapePedigree() { rand = new Random(); db_ppdb = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); //var cmd = "UPDATE PQ_Horse SET PQOutcome = 'Rework' WHERE SireId IS NULL OR DamId IS NULL"; //db_ppdb.ExecuteCommand(cmd); foreach ( PQ_Horse scrape in db_ppdb.PQ_Horses.Where(x => x.MergeMatchBasis == null && (x.SireId == null || x.DamId == null)) .OrderBy(x => x.Name)) { Logger.WriteLog("Scrape PQ for " + scrape.Name + " " + scrape.Id); string outcome = ""; string pqid = ""; string status = ""; // get the page string page = ""; GetHorse(scrape.Id, ref status, ref page); if (status == "Complete") { if (page.Contains("can't be found in the database")) { outcome = "not found"; Logger.WriteLog(scrape.Name + " not found\r\n"); } else { ScrapePage(db_ppdb, page, ref outcome, ref pqid); db_ppdb.SubmitChanges(); } } else { outcome = status; Logger.WriteLog(scrape.Name + " " + status + "\r\n"); } scrape.PQOutcome = "Retrieved " + outcome; db_ppdb.SubmitChanges(); } }
private static void ScrapePage(RacingPostRacesDataContext db_ppdb, string page, ref string outcome, ref string pqid) { List <PedigreeHorse> horses = PedigreeExtract.Extract(page); var added = new List <string>(); foreach (PedigreeHorse horse in horses.OrderBy(x => x.Generation).ThenByDescending(x => x.Pedigree)) { if (added.Contains(horse.PQId)) { continue; } PQ_Horse pq_horse = db_ppdb.PQ_Horses.FirstOrDefault(x => x.Id == horse.PQId); if (pq_horse == null) { pq_horse = new PQ_Horse(); db_ppdb.PQ_Horses.InsertOnSubmit(pq_horse); pq_horse.Id = horse.PQId; pq_horse.Name = horse.Name; pq_horse.FlatName = FlattenName(horse.Name, false); added.Add(horse.PQId); } if (pq_horse.Country == null) { pq_horse.Country = horse.Country; } if (pq_horse.FoalYear == null) { pq_horse.FoalYear = horse.FoalYear; } if (pq_horse.Colour == null) { pq_horse.Colour = horse.Colour; } if (pq_horse.Sex == null) { pq_horse.Sex = horse.Sex; } if (pq_horse.SireId == null) { PedigreeHorse sire1 = horses.FirstOrDefault(x => x.Pedigree == horse.Pedigree + "S"); if (sire1 != null) { pq_horse.SireId = sire1.PQId; } } if (pq_horse.DamId == null) { PedigreeHorse dam1 = horses.FirstOrDefault(x => x.Pedigree == horse.Pedigree + "D"); if (dam1 != null) { pq_horse.DamId = dam1.PQId; } } if (horse.Generation == 0) { pq_horse.Starts = horse.Starts; pq_horse.Wins = horse.Wins; pq_horse.Places = horse.Places; pq_horse.Earnings = horse.Earnings; pq_horse.Owner = horse.Owner; pq_horse.Breeder = horse.Breeder; } Logger.WriteLog(horse.Pedigree + " " + horse.PQId + " " + horse.Name + " " + horse.Country + " " + horse.FoalYear + " " + horse.Colour + " " + horse.Starts + " " + horse.Wins + " " + horse.Places + " " + horse.Earnings + " " + horse.Owner + " " + horse.Breeder); } }
public static void ScrapeUnmatched() { rand = new Random(); db_ppdb = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); while (db_ppdb.Horses.Any(x => x.PPMatchBasis == "Failed RP_PP" && x.PriorityProcess)) { foreach (Horse scrape in db_ppdb.Horses.Where(x => x.PPMatchBasis == "Failed RP_PP" && x.PriorityProcess).Take(500)) { Logger.WriteLog("Scrape PQ for " + scrape.Name + " " + scrape.Id); scrape.PPMatchBasis = "Pending RP_PQ"; string outcome = ""; string pqid = ""; string status = ""; string page = ""; SearchHorse(scrape.Name, ref status, ref page); if (status == "Complete" && page.Contains("can't be found in the database")) { var regex = new Regex("(.*) [ivxIVX]+$"); Match match = regex.Match(scrape.Name); if (match.Success) { status = ""; SearchHorse(match.Groups[1].ToString(), ref status, ref page); } } if (status == "Complete") { if (page.Contains("can't be found in the database")) { outcome = "not found"; Logger.WriteLog(scrape.Name + " not found\r\n"); } else if (page.Contains("more than \none horse named ")) { List <string> pqids = ProcessMultiple(scrape, page); foreach (string multi_pqid in pqids) { if (multi_pqid == "skiing" || multi_pqid == "generator") { continue; } GetHorse(multi_pqid, ref status, ref page); if (status == "Complete") { if (page.Contains("can't be found in the database")) { Logger.WriteLog(scrape.Name + " multiple not found\r\n"); } else { ScrapePage(db_ppdb, page, ref outcome, ref pqid); db_ppdb.SubmitChanges(); } } else { Logger.WriteLog(multi_pqid + " Already retrieved\r\n"); } } } else { ScrapePage(db_ppdb, page, ref outcome, ref pqid); db_ppdb.SubmitChanges(); } Logger.WriteLog(" "); } scrape.PQOutcome = "Retrieved " + outcome; db_ppdb.SubmitChanges(); } db_ppdb = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); } }
public static void Merge_RP_PQ_PP() { db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); while (db_racing_update.Horses.Where(x => x.PPMatchBasis == "Pending RP_PQ").Any()) { foreach ( Horse rp_horse in db_racing_update.Horses.Where(x => x.PPMatchBasis == "Pending RP_PQ") .OrderBy(x => x.Id) .ThenBy(x => x.Name) .Take(500)) { rp_horse.PPId = null; rp_horse.PPMatchBasis = "Failed RP_PQ_PP"; Logger.WriteLog("Merging RP/PQ to PP " + rp_horse.Name + " " + rp_horse.Id); PQ_Horse pq_horse = db_racing_update.PQ_Horses.Where(x => x.Id == rp_horse.PQId).FirstOrDefault(); if (pq_horse != null) { if (pq_horse.MergeId != null) { Horse_Merged merge_horse = db_racing_update.Horse_Mergeds.Where(x => x.PPId == pq_horse.MergeId).FirstOrDefault(); if (merge_horse != null) { merge_horse.RHId = rp_horse.Id; merge_horse.Name = rp_horse.Name; if (merge_horse.Country == null) { merge_horse.Country = rp_horse.Country; } if (merge_horse.FoalDate == null) { merge_horse.FoalDate = rp_horse.FoalDate; } if (merge_horse.FoalYear == null) { merge_horse.FoalYear = rp_horse.FoalYear; } merge_horse.MergeBasis = "RP-PQ Matched"; } rp_horse.PPId = pq_horse.MergeId; rp_horse.PPMatchBasis = "PQ Direct"; } else if (Merge_RP_PQ_Merge(pq_horse, 1, rp_horse.Id) != null) { rp_horse.PPId = pq_horse.MergeId; rp_horse.PPMatchBasis = "PQ Added"; } } db_racing_update.SubmitChanges(); } db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); } }
public static void Match_PQ_PP() { db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); pp_horses = db_racing_read.PP_Horse_Selects.ToDictionary(x => x.Id, x => new PPHorse { HorseName = x.HorseName, Id = x.Id, SireId = x.SireId, SireName = x.SireName, DamName = x.DamName, Country = x.Country, FoalYear = x.FoalYear, OrigName = x.OrigName }); pp_horse_lookup = pp_horses.Select(x => x.Value).ToLookup(x => x.HorseName, x => x.Id); pp_sire_lookup = pp_horses.Where(x => x.Value.SireId != null) .Select(x => x.Value) .ToLookup(x => (int)x.SireId, x => x.Id); while (db_racing_update.PQ_Horses.Where(x => x.MergeMatchBasis == null).Any()) { foreach (PQ_Horse pq_horse in db_racing_update.PQ_Horses.Where(x => x.MergeMatchBasis == null).Take(500) ) { pq_horse.MergeMatchBasis = "Failed"; Logger.WriteLog("Matching PQ to PP " + pq_horse.Name + " " + pq_horse.Id); string horse_name = FlattenName(pq_horse.Name); string sire_name = null; string dam_name = null; int? year = null; if (pq_horse.FoalYear != null) { year = pq_horse.FoalYear; } string country = pq_horse.Country == null ? "" : pq_horse.Country; PQ_Horse pq_sire = null; if (pq_horse.SireId != null) { pq_sire = db_racing_read.PQ_Horses.Where(x => x.Id == pq_horse.SireId).FirstOrDefault(); if (pq_sire != null) { sire_name = FlattenName(pq_sire.Name); } } PQ_Horse pq_dam = null; if (pq_horse.DamId != null) { pq_dam = db_racing_read.PQ_Horses.Where(x => x.Id == pq_horse.DamId).FirstOrDefault(); if (pq_dam != null) { dam_name = FlattenName(pq_dam.Name); } } int? ppid = null; string match_basis = null; if (MatchToPPByName(horse_name, sire_name, dam_name, year, country, ref ppid, ref match_basis)) { pq_horse.MergeId = (int)ppid; pq_horse.MergeMatchBasis = match_basis; } if (ppid == null && pq_sire != null) { int?sire_merge_id = null; if (pq_sire.MergeId != null) { sire_merge_id = pq_sire.MergeId; } match_basis = "Sire"; if (MatchToPPBySire(sire_merge_id, horse_name, sire_name, dam_name, year, country, ref ppid, ref match_basis)) { pq_horse.MergeId = (int)ppid; pq_horse.MergeMatchBasis = match_basis; } } } db_racing_update.SubmitChanges(); } db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); }
public static void Match_RP_PQ() { db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); pq_horses = db_racing_read.PQ_Horse_Selects.ToDictionary(x => x.Id, x => new PQHorse { HorseName = x.FlatName, Id = x.Id, SireId = x.SireId, SireName = x.SireName, DamName = x.DamName, Country = x.Country, FoalYear = x.FoalYear }); pq_horse_lookup = pq_horses.Select(x => x.Value).ToLookup(x => x.HorseName, x => x.Id); pq_sire_lookup = pq_horses.Where(x => x.Value.SireId != null) .Select(x => x.Value) .ToLookup(x => x.SireId, x => x.Id); while (db_racing_update.Horses.Where(x => x.PQMatchBasis == null).Any()) { foreach (Horse rp_horse in db_racing_update.Horses.Where(x => x.PQMatchBasis == null).Take(500)) { rp_horse.PQMatchBasis = "Failed"; Logger.WriteLog("Match RP to PQ " + rp_horse.Name + " " + rp_horse.Id); string horse_name = FlattenName(rp_horse.Name); string sire_name = null; string dam_name = null; int? year = null; if (rp_horse.FoalYear != null) { year = rp_horse.FoalYear; } string country = rp_horse.Country == null ? "" : rp_horse.Country; Horse rp_sire = null; if (rp_horse.SireId != null) { rp_sire = db_racing_update.Horses.Where(x => x.Id == rp_horse.SireId).FirstOrDefault(); if (rp_sire != null) { sire_name = FlattenName(rp_sire.Name); } } Horse rp_dam = null; if (rp_horse.DamId != null) { rp_dam = db_racing_update.Horses.Where(x => x.Id == rp_horse.DamId).FirstOrDefault(); if (rp_dam != null) { dam_name = FlattenName(rp_dam.Name); } } string pqid = null; string match_basis = null; if (MatchToPQByName(horse_name, sire_name, dam_name, year, country, ref pqid, ref match_basis)) { rp_horse.PQId = pqid; rp_horse.PQMatchBasis = match_basis; } if (pqid == null && rp_sire != null) { string sire_pqid = null; if (rp_sire.PQId != null) { sire_pqid = rp_sire.PQId; } match_basis = "Sire"; if (MatchToPQBySire(sire_pqid, horse_name, sire_name, dam_name, year, country, ref pqid, ref match_basis)) { rp_horse.PQId = pqid; rp_horse.PQMatchBasis = match_basis; } } // this option for dams for which we have minimal info if (pqid == null && rp_sire != null) { int offspring_year_min = 9999; int offspring_year_max = 0; foreach (Horse offspring in db_racing_read.Horses.Where(x => x.DamId == rp_horse.Id)) { if (offspring.FoalYear != null) { if (offspring.FoalYear < offspring_year_min) { offspring_year_min = (int)offspring.FoalYear; } if (offspring.FoalYear > offspring_year_max) { offspring_year_max = (int)offspring.FoalYear; } } } if (offspring_year_min != 9999) { if (MatchToPQDamSpecial(horse_name, sire_name, offspring_year_min, offspring_year_max, ref pqid, ref match_basis)) { rp_horse.PQId = pqid; rp_horse.PQMatchBasis = match_basis; } } } db_racing_update.SubmitChanges(); } db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); } }
public static void Add_RP_Merge() { db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); db_racing_read.CommandTimeout = 180; while (db_racing_read.RP_Match_By_Parents.Any()) { List <RP_Match_By_Parent> view_recs = db_racing_read.RP_Match_By_Parents.Take(500).ToList(); foreach (RP_Match_By_Parent view_rec in view_recs) { Logger.WriteLog("Attempting RP Merge for " + view_rec.Name + " " + view_rec.Id); Horse rp_horse = db_racing_update.Horses.Where(x => x.Id == view_rec.Id).FirstOrDefault(); if (rp_horse == null) { continue; } rp_horse.PPMatchBasis = "Failed"; if (rp_horse.FoalYear == null || view_rec.SireFoalYear == null || view_rec.DamFoalYear == null) { db_racing_update.SubmitChanges(); continue; } int sire_diff = (int)rp_horse.FoalYear - (int)view_rec.SireFoalYear; if (sire_diff < 3 || sire_diff > 25) { db_racing_update.SubmitChanges(); continue; } int dam_diff = (int)rp_horse.FoalYear - (int)view_rec.DamFoalYear; if (dam_diff < 3 || dam_diff > 25) { db_racing_update.SubmitChanges(); continue; } var merge_horse = new Horse_Merged(); db_racing_update.Horse_Mergeds.InsertOnSubmit(merge_horse); merge_horse.RHId = rp_horse.Id; merge_horse.Name = rp_horse.Name; merge_horse.Country = rp_horse.Country; merge_horse.FoalDate = rp_horse.FoalDate; merge_horse.FoalYear = rp_horse.FoalYear; merge_horse.Colour = rp_horse.Colour; if (rp_horse.Sex != null) { switch (rp_horse.Sex) { case "f": case "m": merge_horse.Sex = "f"; break; case "c": case "h": merge_horse.Sex = "c"; break; case "g": merge_horse.Sex = "g"; break; case "r": merge_horse.Sex = "r"; break; } } merge_horse.SireId = view_rec.SireId; merge_horse.DamId = view_rec.DamId; merge_horse.Haplo = db_racing_read.Horse_Mergeds.Where(x => x.Id == merge_horse.DamId) .Select(x => x.Haplo) .FirstOrDefault(); merge_horse.MergeBasis = "RP Added"; db_racing_update.SubmitChanges(); rp_horse.PPId = merge_horse.Id; rp_horse.PPMatchBasis = "RP Added"; db_racing_update.SubmitChanges(); } db_racing_update = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); } string cmd = "UPDATE Horse SET PPMatchBasis = 'Failed' WHERE PPMatchBasis = 'Failed RP_PQ_PP'"; db_racing_update.ExecuteCommand(cmd); }
public static void ScrapeDates(DateTime to_date) { try { Common.rand = new Random(); db_rph = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); //todo change LastDateScraped on 240 server //todo new scraperace structure on 240 server //todo amend ScrapeRaceView DateTime date_scrape = db_rph.ScrapeCourses.Select(x => x.LastDateScraped).FirstOrDefault(); if (date_scrape == null) { Logger.WriteLog("Last date scraped missing"); Environment.Exit(-1); } else { date_scrape = date_scrape.AddDays(1); } while (date_scrape <= to_date) { Logger.WriteLog("Scraping " + date_scrape.ToShortDateString()); string status = ""; string page = ""; GetDate(date_scrape, ref status, ref page); if (status == "Complete") { ScrapeDatePage(page, date_scrape); RaceScraper.ScrapeRaces(); HorseScraper.ScrapeHorses(); //Matcher.Match_RP_PP(); //PQScraper.ScrapeUnmatched(); //PQScraper.ScrapePedigree(); //Matcher.Match_RP_PQ(); //Matcher.Match_PQ_PP(); //Matcher.Merge_RP_PQ_PP(); //Matcher.Add_RP_Merge(); } else { Logger.WriteLog("Page retrieval failed: " + status); Environment.Exit(-1); } ScrapeCourse scr_rec = db_rph.ScrapeCourses.FirstOrDefault(); if (scr_rec == null) { scr_rec = new ScrapeCourse(); db_rph.ScrapeCourses.InsertOnSubmit(scr_rec); } scr_rec.LastDateScraped = date_scrape; db_rph.SubmitChanges(); Common.Wait(); date_scrape = date_scrape.AddDays(1); db_rph = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString()); } } catch (Exception exception) { throw exception; } }