예제 #1
0
파일: PQScraper.cs 프로젝트: zubair1599/RP
        public static void ScrapePedigree()
        {
            rand = new Random();

            db_ppdb = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            //var cmd = "UPDATE PQ_Horse SET PQOutcome = 'Rework' WHERE SireId IS NULL OR DamId IS NULL";
            //db_ppdb.ExecuteCommand(cmd);

            foreach (
                PQ_Horse scrape in
                db_ppdb.PQ_Horses.Where(x => x.MergeMatchBasis == null && (x.SireId == null || x.DamId == null))
                .OrderBy(x => x.Name))
            {
                Logger.WriteLog("Scrape PQ for " + scrape.Name + " " + scrape.Id);

                string outcome = "";
                string pqid    = "";
                string status  = "";

                // get the page
                string page = "";

                GetHorse(scrape.Id, ref status, ref page);

                if (status == "Complete")
                {
                    if (page.Contains("can't be found in the database"))
                    {
                        outcome = "not found";
                        Logger.WriteLog(scrape.Name + " not found\r\n");
                    }
                    else
                    {
                        ScrapePage(db_ppdb, page, ref outcome, ref pqid);
                        db_ppdb.SubmitChanges();
                    }
                }
                else
                {
                    outcome = status;
                    Logger.WriteLog(scrape.Name + " " + status + "\r\n");
                }

                scrape.PQOutcome = "Retrieved " + outcome;
                db_ppdb.SubmitChanges();
            }
        }
예제 #2
0
        public static void ScrapeHorses()
        {
            Common.rand = new Random();
            db_rph      = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());

            int count = db_rph.Horses.Count(x => x.DetailProcessed == 0 && x.PriorityProcess);

            while (db_rph.Horses.Any(x => x.DetailProcessed == 0 && x.PriorityProcess))
            {
                foreach (Horse scrape in db_rph.Horses.Where(x => x.DetailProcessed == 0 && x.PriorityProcess).Take(500))
                {
                    Logger.WriteLog((count--).ToString() + " - " + scrape.Name);

                    string status = "";
                    string page   = "";
                    GetHorse((int)scrape.RPId, ref status, ref page);

                    scrape.DetailRaw       = "Error";
                    scrape.DetailProcessed = -1;

                    if (status == "Complete")
                    {
                        var   regex_name = new Regex(@"<h1>\s+([^(<]+)(?:\(([A-Z]+)\) ){0,1}</h1>");
                        Match match_name = regex_name.Match(page);
                        if (match_name.Success)
                        {
                            string country = match_name.Groups[2].ToString();
                            if (country == "")
                            {
                                country = "GB";
                            }
                            scrape.Country = country;
                        }

                        var   regex_header = new Regex("<ul id=\"detailedInfo\">(.*?)</ul>", RegexOptions.Singleline);
                        Match match_header = regex_header.Match(page);
                        if (match_header.Success)
                        {
                            scrape.DetailRaw = match_header.Groups[1].ToString();
                            ProcessDetail(scrape);
                            scrape.DetailProcessed = 1;
                        }
                        else
                        {
                            Logger.WriteLog("Detail not found: " + scrape.Name);
                        }
                    }
                    else
                    {
                        Logger.WriteLog("Horse retrieval error " + status + " - " + scrape.Name);
                    }

                    db_rph.SubmitChanges();
                    Common.Wait();
                }

                db_rph = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            }
        }
예제 #3
0
파일: PQScraper.cs 프로젝트: zubair1599/RP
        public static void ScrapeUnmatched()
        {
            rand    = new Random();
            db_ppdb = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());

            while (db_ppdb.Horses.Any(x => x.PPMatchBasis == "Failed RP_PP" && x.PriorityProcess))
            {
                foreach (Horse scrape in db_ppdb.Horses.Where(x => x.PPMatchBasis == "Failed RP_PP" && x.PriorityProcess).Take(500))
                {
                    Logger.WriteLog("Scrape PQ for " + scrape.Name + " " + scrape.Id);
                    scrape.PPMatchBasis = "Pending RP_PQ";

                    string outcome = "";
                    string pqid    = "";
                    string status  = "";

                    string page = "";

                    SearchHorse(scrape.Name, ref status, ref page);
                    if (status == "Complete" && page.Contains("can't be found in the database"))
                    {
                        var   regex = new Regex("(.*) [ivxIVX]+$");
                        Match match = regex.Match(scrape.Name);
                        if (match.Success)
                        {
                            status = "";
                            SearchHorse(match.Groups[1].ToString(), ref status, ref page);
                        }
                    }

                    if (status == "Complete")
                    {
                        if (page.Contains("can't be found in the database"))
                        {
                            outcome = "not found";
                            Logger.WriteLog(scrape.Name + " not found\r\n");
                        }
                        else if (page.Contains("more than \none horse named "))
                        {
                            List <string> pqids = ProcessMultiple(scrape, page);
                            foreach (string multi_pqid in pqids)
                            {
                                if (multi_pqid == "skiing" || multi_pqid == "generator")
                                {
                                    continue;
                                }

                                GetHorse(multi_pqid, ref status, ref page);

                                if (status == "Complete")
                                {
                                    if (page.Contains("can't be found in the database"))
                                    {
                                        Logger.WriteLog(scrape.Name + " multiple not found\r\n");
                                    }
                                    else
                                    {
                                        ScrapePage(db_ppdb, page, ref outcome, ref pqid);
                                        db_ppdb.SubmitChanges();
                                    }
                                }
                                else
                                {
                                    Logger.WriteLog(multi_pqid + " Already retrieved\r\n");
                                }
                            }
                        }
                        else
                        {
                            ScrapePage(db_ppdb, page, ref outcome, ref pqid);
                            db_ppdb.SubmitChanges();
                        }
                        Logger.WriteLog(" ");
                    }

                    scrape.PQOutcome = "Retrieved " + outcome;
                    db_ppdb.SubmitChanges();
                }

                db_ppdb = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            }
        }
예제 #4
0
        private static int?Merge_RP_PQ_Merge(PQ_Horse pq_horse, int gen, int rh_id)
        {
            int?merge_sire_id = null;
            int?merge_dam_id  = null;

            PQ_Horse pq_sire = db_racing_read.PQ_Horses.Where(x => x.Id == pq_horse.SireId).FirstOrDefault();

            if (pq_sire == null)
            {
                return(null);
            }
            else
            {
                if (pq_sire.MergeId != null)
                {
                    merge_sire_id = pq_sire.MergeId;
                }
                else
                {
                    merge_sire_id = Merge_RP_PQ_Merge(pq_sire, gen + 1, 0);
                    if (merge_sire_id == null)
                    {
                        return(null);
                    }
                }
            }

            PQ_Horse pq_dam = db_racing_read.PQ_Horses.Where(x => x.Id == pq_horse.DamId).FirstOrDefault();

            if (pq_dam == null)
            {
                return(null);
            }
            else
            {
                if (pq_dam.MergeId != null)
                {
                    merge_dam_id = pq_dam.MergeId;
                }
                else
                {
                    merge_dam_id = Merge_RP_PQ_Merge(pq_dam, gen + 1, 0);
                    if (merge_dam_id == null)
                    {
                        return(null);
                    }
                }
            }

            var merge_horse = new Horse_Merged();

            db_racing_update.Horse_Mergeds.InsertOnSubmit(merge_horse);

            if (rh_id != 0)
            {
                merge_horse.RHId = rh_id;
            }

            CultureInfo cult_info = Thread.CurrentThread.CurrentCulture;
            TextInfo    text_info = cult_info.TextInfo;

            merge_horse.Name    = text_info.ToTitleCase(pq_horse.Name.ToLower());
            merge_horse.Country = pq_horse.Country;
            if (pq_horse.FoalYear != null)
            {
                merge_horse.FoalDate = new DateTime((int)pq_horse.FoalYear, 1, 1);
                merge_horse.FoalYear = pq_horse.FoalYear;
            }
            merge_horse.Colour = pq_horse.Colour;
            merge_horse.Sex    = pq_horse.Sex;
            merge_horse.SireId = merge_sire_id;
            merge_horse.DamId  = merge_dam_id;
            merge_horse.Haplo  =
                db_racing_read.Horse_Mergeds.Where(x => x.Id == merge_dam_id).Select(x => x.Haplo).FirstOrDefault();
            merge_horse.MergeBasis = "RP-PQ Added";

            db_racing_update.SubmitChanges();

            pq_horse.MergeId         = merge_horse.Id;
            pq_horse.MergeMatchBasis = "Added";

            return(merge_horse.Id);
        }
예제 #5
0
        public static void Merge_RP_PQ_PP()
        {
            db_racing_update =
                new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());

            while (db_racing_update.Horses.Where(x => x.PPMatchBasis == "Pending RP_PQ").Any())
            {
                foreach (
                    Horse rp_horse in
                    db_racing_update.Horses.Where(x => x.PPMatchBasis == "Pending RP_PQ")
                    .OrderBy(x => x.Id)
                    .ThenBy(x => x.Name)
                    .Take(500))
                {
                    rp_horse.PPId         = null;
                    rp_horse.PPMatchBasis = "Failed RP_PQ_PP";

                    Logger.WriteLog("Merging RP/PQ to PP " + rp_horse.Name + " " + rp_horse.Id);

                    PQ_Horse pq_horse = db_racing_update.PQ_Horses.Where(x => x.Id == rp_horse.PQId).FirstOrDefault();
                    if (pq_horse != null)
                    {
                        if (pq_horse.MergeId != null)
                        {
                            Horse_Merged merge_horse =
                                db_racing_update.Horse_Mergeds.Where(x => x.PPId == pq_horse.MergeId).FirstOrDefault();
                            if (merge_horse != null)
                            {
                                merge_horse.RHId = rp_horse.Id;
                                merge_horse.Name = rp_horse.Name;
                                if (merge_horse.Country == null)
                                {
                                    merge_horse.Country = rp_horse.Country;
                                }
                                if (merge_horse.FoalDate == null)
                                {
                                    merge_horse.FoalDate = rp_horse.FoalDate;
                                }
                                if (merge_horse.FoalYear == null)
                                {
                                    merge_horse.FoalYear = rp_horse.FoalYear;
                                }
                                merge_horse.MergeBasis = "RP-PQ Matched";
                            }
                            rp_horse.PPId         = pq_horse.MergeId;
                            rp_horse.PPMatchBasis = "PQ Direct";
                        }

                        else if (Merge_RP_PQ_Merge(pq_horse, 1, rp_horse.Id) != null)
                        {
                            rp_horse.PPId         = pq_horse.MergeId;
                            rp_horse.PPMatchBasis = "PQ Added";
                        }
                    }

                    db_racing_update.SubmitChanges();
                }

                db_racing_update =
                    new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            }
        }
예제 #6
0
        public static void Match_PQ_PP()
        {
            db_racing_update =
                new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());

            pp_horses = db_racing_read.PP_Horse_Selects.ToDictionary(x => x.Id, x => new PPHorse
            {
                HorseName = x.HorseName,
                Id        = x.Id,
                SireId    = x.SireId,
                SireName  = x.SireName,
                DamName   = x.DamName,
                Country   = x.Country,
                FoalYear  = x.FoalYear,
                OrigName  = x.OrigName
            });
            pp_horse_lookup = pp_horses.Select(x => x.Value).ToLookup(x => x.HorseName, x => x.Id);
            pp_sire_lookup  =
                pp_horses.Where(x => x.Value.SireId != null)
                .Select(x => x.Value)
                .ToLookup(x => (int)x.SireId, x => x.Id);

            while (db_racing_update.PQ_Horses.Where(x => x.MergeMatchBasis == null).Any())
            {
                foreach (PQ_Horse pq_horse in db_racing_update.PQ_Horses.Where(x => x.MergeMatchBasis == null).Take(500)
                         )
                {
                    pq_horse.MergeMatchBasis = "Failed";

                    Logger.WriteLog("Matching PQ to PP " + pq_horse.Name + " " + pq_horse.Id);

                    string horse_name = FlattenName(pq_horse.Name);
                    string sire_name  = null;
                    string dam_name   = null;
                    int?   year       = null;
                    if (pq_horse.FoalYear != null)
                    {
                        year = pq_horse.FoalYear;
                    }
                    string country = pq_horse.Country == null ? "" : pq_horse.Country;

                    PQ_Horse pq_sire = null;
                    if (pq_horse.SireId != null)
                    {
                        pq_sire = db_racing_read.PQ_Horses.Where(x => x.Id == pq_horse.SireId).FirstOrDefault();
                        if (pq_sire != null)
                        {
                            sire_name = FlattenName(pq_sire.Name);
                        }
                    }

                    PQ_Horse pq_dam = null;
                    if (pq_horse.DamId != null)
                    {
                        pq_dam = db_racing_read.PQ_Horses.Where(x => x.Id == pq_horse.DamId).FirstOrDefault();
                        if (pq_dam != null)
                        {
                            dam_name = FlattenName(pq_dam.Name);
                        }
                    }

                    int?   ppid        = null;
                    string match_basis = null;
                    if (MatchToPPByName(horse_name, sire_name, dam_name, year, country, ref ppid, ref match_basis))
                    {
                        pq_horse.MergeId         = (int)ppid;
                        pq_horse.MergeMatchBasis = match_basis;
                    }

                    if (ppid == null && pq_sire != null)
                    {
                        int?sire_merge_id = null;
                        if (pq_sire.MergeId != null)
                        {
                            sire_merge_id = pq_sire.MergeId;
                        }

                        match_basis = "Sire";
                        if (MatchToPPBySire(sire_merge_id, horse_name, sire_name, dam_name, year, country, ref ppid,
                                            ref match_basis))
                        {
                            pq_horse.MergeId         = (int)ppid;
                            pq_horse.MergeMatchBasis = match_basis;
                        }
                    }
                }

                db_racing_update.SubmitChanges();
            }
            db_racing_update =
                new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
        }
예제 #7
0
        public static void Match_RP_PQ()
        {
            db_racing_update =
                new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());

            pq_horses = db_racing_read.PQ_Horse_Selects.ToDictionary(x => x.Id, x => new PQHorse
            {
                HorseName = x.FlatName,
                Id        = x.Id,
                SireId    = x.SireId,
                SireName  = x.SireName,
                DamName   = x.DamName,
                Country   = x.Country,
                FoalYear  = x.FoalYear
            });
            pq_horse_lookup = pq_horses.Select(x => x.Value).ToLookup(x => x.HorseName, x => x.Id);
            pq_sire_lookup  = pq_horses.Where(x => x.Value.SireId != null)
                              .Select(x => x.Value)
                              .ToLookup(x => x.SireId, x => x.Id);

            while (db_racing_update.Horses.Where(x => x.PQMatchBasis == null).Any())
            {
                foreach (Horse rp_horse in db_racing_update.Horses.Where(x => x.PQMatchBasis == null).Take(500))
                {
                    rp_horse.PQMatchBasis = "Failed";

                    Logger.WriteLog("Match RP to PQ " + rp_horse.Name + " " + rp_horse.Id);

                    string horse_name = FlattenName(rp_horse.Name);
                    string sire_name  = null;
                    string dam_name   = null;
                    int?   year       = null;
                    if (rp_horse.FoalYear != null)
                    {
                        year = rp_horse.FoalYear;
                    }
                    string country = rp_horse.Country == null ? "" : rp_horse.Country;

                    Horse rp_sire = null;
                    if (rp_horse.SireId != null)
                    {
                        rp_sire = db_racing_update.Horses.Where(x => x.Id == rp_horse.SireId).FirstOrDefault();
                        if (rp_sire != null)
                        {
                            sire_name = FlattenName(rp_sire.Name);
                        }
                    }

                    Horse rp_dam = null;
                    if (rp_horse.DamId != null)
                    {
                        rp_dam = db_racing_update.Horses.Where(x => x.Id == rp_horse.DamId).FirstOrDefault();
                        if (rp_dam != null)
                        {
                            dam_name = FlattenName(rp_dam.Name);
                        }
                    }

                    string pqid        = null;
                    string match_basis = null;
                    if (MatchToPQByName(horse_name, sire_name, dam_name, year, country, ref pqid, ref match_basis))
                    {
                        rp_horse.PQId         = pqid;
                        rp_horse.PQMatchBasis = match_basis;
                    }

                    if (pqid == null && rp_sire != null)
                    {
                        string sire_pqid = null;
                        if (rp_sire.PQId != null)
                        {
                            sire_pqid = rp_sire.PQId;
                        }

                        match_basis = "Sire";
                        if (MatchToPQBySire(sire_pqid, horse_name, sire_name, dam_name, year, country, ref pqid,
                                            ref match_basis))
                        {
                            rp_horse.PQId         = pqid;
                            rp_horse.PQMatchBasis = match_basis;
                        }
                    }


                    // this option for dams for which we have minimal info
                    if (pqid == null && rp_sire != null)
                    {
                        int offspring_year_min = 9999;
                        int offspring_year_max = 0;
                        foreach (Horse offspring in db_racing_read.Horses.Where(x => x.DamId == rp_horse.Id))
                        {
                            if (offspring.FoalYear != null)
                            {
                                if (offspring.FoalYear < offspring_year_min)
                                {
                                    offspring_year_min = (int)offspring.FoalYear;
                                }

                                if (offspring.FoalYear > offspring_year_max)
                                {
                                    offspring_year_max = (int)offspring.FoalYear;
                                }
                            }
                        }

                        if (offspring_year_min != 9999)
                        {
                            if (MatchToPQDamSpecial(horse_name, sire_name, offspring_year_min, offspring_year_max,
                                                    ref pqid, ref match_basis))
                            {
                                rp_horse.PQId         = pqid;
                                rp_horse.PQMatchBasis = match_basis;
                            }
                        }
                    }

                    db_racing_update.SubmitChanges();
                }
                db_racing_update =
                    new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            }
        }
예제 #8
0
        public static void Add_RP_Merge()
        {
            db_racing_update =
                new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            db_racing_read = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());

            db_racing_read.CommandTimeout = 180;

            while (db_racing_read.RP_Match_By_Parents.Any())
            {
                List <RP_Match_By_Parent> view_recs = db_racing_read.RP_Match_By_Parents.Take(500).ToList();
                foreach (RP_Match_By_Parent view_rec in view_recs)
                {
                    Logger.WriteLog("Attempting RP Merge for " + view_rec.Name + " " + view_rec.Id);

                    Horse rp_horse = db_racing_update.Horses.Where(x => x.Id == view_rec.Id).FirstOrDefault();
                    if (rp_horse == null)
                    {
                        continue;
                    }

                    rp_horse.PPMatchBasis = "Failed";

                    if (rp_horse.FoalYear == null || view_rec.SireFoalYear == null || view_rec.DamFoalYear == null)
                    {
                        db_racing_update.SubmitChanges();
                        continue;
                    }

                    int sire_diff = (int)rp_horse.FoalYear - (int)view_rec.SireFoalYear;
                    if (sire_diff < 3 || sire_diff > 25)
                    {
                        db_racing_update.SubmitChanges();
                        continue;
                    }

                    int dam_diff = (int)rp_horse.FoalYear - (int)view_rec.DamFoalYear;
                    if (dam_diff < 3 || dam_diff > 25)
                    {
                        db_racing_update.SubmitChanges();
                        continue;
                    }

                    var merge_horse = new Horse_Merged();
                    db_racing_update.Horse_Mergeds.InsertOnSubmit(merge_horse);

                    merge_horse.RHId = rp_horse.Id;

                    merge_horse.Name     = rp_horse.Name;
                    merge_horse.Country  = rp_horse.Country;
                    merge_horse.FoalDate = rp_horse.FoalDate;
                    merge_horse.FoalYear = rp_horse.FoalYear;
                    merge_horse.Colour   = rp_horse.Colour;

                    if (rp_horse.Sex != null)
                    {
                        switch (rp_horse.Sex)
                        {
                        case "f":
                        case "m":
                            merge_horse.Sex = "f";
                            break;

                        case "c":
                        case "h":
                            merge_horse.Sex = "c";
                            break;

                        case "g":
                            merge_horse.Sex = "g";
                            break;

                        case "r":
                            merge_horse.Sex = "r";
                            break;
                        }
                    }

                    merge_horse.SireId = view_rec.SireId;
                    merge_horse.DamId  = view_rec.DamId;
                    merge_horse.Haplo  =
                        db_racing_read.Horse_Mergeds.Where(x => x.Id == merge_horse.DamId)
                        .Select(x => x.Haplo)
                        .FirstOrDefault();
                    merge_horse.MergeBasis = "RP Added";

                    db_racing_update.SubmitChanges();

                    rp_horse.PPId         = merge_horse.Id;
                    rp_horse.PPMatchBasis = "RP Added";

                    db_racing_update.SubmitChanges();
                }
                db_racing_update =
                    new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
            }

            string cmd = "UPDATE Horse SET PPMatchBasis = 'Failed' WHERE PPMatchBasis = 'Failed RP_PQ_PP'";

            db_racing_update.ExecuteCommand(cmd);
        }
예제 #9
0
        private static void ScrapeDatePage(string page, DateTime date)
        {
            string meeting_head = "";
            string meeting_tail = "";
            string races        = "";

            string re1  = "(<)";                              // Any Single Character 1
            string re2  = "(div)";                            // Word 1
            string re3  = "( )";                              // Any Single Character 2
            string re4  = "(class)";                          // Word 2
            string re5  = "(=)";                              // Any Single Character 3
            string re6  = "(\"rp-resultsWrapper__content\")"; // Double Quote String 1
            string re7  = ".*?";                              // Non-greedy match on filler
            string re8  = "()";                               // Tag 1
            string re9  = ".*?";                              // Non-greedy match on filler
            string re10 = "(<\\/h1>)";                        // Tag 2
            string re11 = "(<\\/div>)";                       // Tag 3

            Regex r = new Regex(re1 + re2 + re3 + re4 + re5 + re6 + re7 + re8 + re9 + re10 + re11, RegexOptions.IgnoreCase | RegexOptions.Singleline);

            //var regex_meeting =
            //    new Regex(@"(<)(div)( )(class)(=)(")(rp)(-)(resultsWrapper__content)(")(>)(<[^>]+>).*?<.*?<.*?(<)(\/)(h1)(>)(<)(\/div)(>)",
            //        RegexOptions.Singleline);
            Match match_meeting = r.Match(page);

            while (match_meeting.Success)
            {
                meeting_head = match_meeting.Groups[1].ToString();
                meeting_tail = match_meeting.Groups[2].ToString();
                races        = match_meeting.Groups[3].ToString();

                var regex_course =
                    new Regex(
                        @"<a href=""http://www.racingpost.com/horses/course_home.sd\?crs_id=(\d+)[^>]+>([^<]+)</a>");
                Match match_course = regex_course.Match(meeting_head);
                if (match_course.Success)
                {
                    int    course_id    = Convert.ToInt32(match_course.Groups[1].ToString());
                    string course_whole = match_course.Groups[2].ToString().Replace("(AW)", ":AW:");

                    string course        = "";
                    string country       = "";
                    var    regex_course2 = new Regex(@"([^(]+)(\([^(]+\)){0,1}");
                    Match  match_course2 = regex_course2.Match(course_whole);
                    if (match_course2.Success)
                    {
                        course  = match_course2.Groups[1].ToString().Replace(":AW:", "(AW)").Trim();
                        country = match_course2.Groups[2].ToString().Replace("(", "").Replace(")", "").Trim();
                    }

                    string going       = "";
                    var    regex_going = new Regex(@"<strong>GOING:</strong>([^<\n]+)", RegexOptions.Singleline);
                    Match  match_going = regex_going.Match(meeting_tail);
                    if (match_going.Success)
                    {
                        going = match_going.Groups[1].ToString().Trim();
                    }

                    string weather       = "";
                    var    regex_weather = new Regex(@"<strong>Weather conditions:</strong>([^<\n]+)",
                                                     RegexOptions.Singleline);
                    Match match_weather = regex_weather.Match(meeting_tail);
                    if (match_weather.Success)
                    {
                        weather = match_weather.Groups[1].ToString().Trim();
                    }

                    string stalls       = "";
                    var    regex_stalls = new Regex(@"<strong>STALLS:</strong>([^<\n]+)", RegexOptions.Singleline);
                    Match  match_stalls = regex_stalls.Match(meeting_tail);
                    if (match_stalls.Success)
                    {
                        stalls = match_stalls.Groups[1].ToString().Trim();
                    }

                    Course course_rec = db_rph.Courses.Where(x => x.Id == course_id).FirstOrDefault();
                    if (course_rec == null)
                    {
                        course_rec    = new Course();
                        course_rec.Id = course_id;
                        db_rph.Courses.InsertOnSubmit(course_rec);
                    }

                    course_rec.Name    = course;
                    course_rec.Country = country;
                    db_rph.SubmitChanges();

                    Meeting meeting_rec =
                        db_rph.Meetings.Where(x => x.CourseId == course_id && x.DateOfMeeting == date).FirstOrDefault();
                    if (meeting_rec != null)
                    {
                        string cmd = String.Format("DELETE FROM Meeting WHERE Id = {0}", meeting_rec.Id);
                        db_rph.ExecuteCommand(cmd);
                        cmd = String.Format("DELETE FROM ScrapeRace WHERE MeetingId = {0}", meeting_rec.Id);
                        db_rph.ExecuteCommand(cmd);
                    }

                    meeting_rec = new Meeting();
                    db_rph.Meetings.InsertOnSubmit(meeting_rec);
                    meeting_rec.CourseId      = course_id;
                    meeting_rec.DateOfMeeting = date;
                    meeting_rec.Going         = going;
                    meeting_rec.Weather       = weather;
                    meeting_rec.Stalls        = stalls;
                    db_rph.SubmitChanges();

                    var   regex_race = new Regex(@"<td(.*?)</td>", RegexOptions.Singleline);
                    Match match_race = regex_race.Match(races);
                    while (match_race.Success)
                    {
                        string race = match_race.Groups[1].ToString();

                        string race_link       = "";
                        int    race_id         = 0;
                        var    regex_race_link = new Regex(@"<a href=""(/horses/result_home\.sd\?race_id=(\d+)[^""]+)""",
                                                           RegexOptions.Singleline);
                        Match match_race_link = regex_race_link.Match(race);
                        if (match_race_link.Success)
                        {
                            race_link = match_race_link.Groups[1].ToString().Trim();
                            race_id   = Convert.ToInt32(match_race_link.Groups[2].ToString().Trim());

                            var race_rec = new ScrapeRace();
                            db_rph.ScrapeRaces.InsertOnSubmit(race_rec);
                            race_rec.MeetingId = meeting_rec.Id;
                            race_rec.Link      = "http://www.racingpost.com" + race_link;
                            race_rec.Scraped   = false;
                            race_rec.RaceId    = race_id;
                            race_rec.RaceDate  = date;

                            db_rph.SubmitChanges();
                        }

                        match_race = match_race.NextMatch();
                    }
                }

                match_meeting = match_meeting.NextMatch();
            }
        }
예제 #10
0
        public static void ScrapeDates(DateTime to_date)
        {
            try
            {
                Common.rand = new Random();
                db_rph      = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());

                //todo change LastDateScraped on 240 server
                //todo new scraperace structure on 240 server
                //todo amend ScrapeRaceView
                DateTime date_scrape = db_rph.ScrapeCourses.Select(x => x.LastDateScraped).FirstOrDefault();
                if (date_scrape == null)
                {
                    Logger.WriteLog("Last date scraped missing");
                    Environment.Exit(-1);
                }
                else
                {
                    date_scrape = date_scrape.AddDays(1);
                }

                while (date_scrape <= to_date)
                {
                    Logger.WriteLog("Scraping " + date_scrape.ToShortDateString());

                    string status = "";
                    string page   = "";
                    GetDate(date_scrape, ref status, ref page);

                    if (status == "Complete")
                    {
                        ScrapeDatePage(page, date_scrape);
                        RaceScraper.ScrapeRaces();
                        HorseScraper.ScrapeHorses();
                        //Matcher.Match_RP_PP();
                        //PQScraper.ScrapeUnmatched();
                        //PQScraper.ScrapePedigree();
                        //Matcher.Match_RP_PQ();
                        //Matcher.Match_PQ_PP();
                        //Matcher.Merge_RP_PQ_PP();
                        //Matcher.Add_RP_Merge();
                    }
                    else
                    {
                        Logger.WriteLog("Page retrieval failed: " + status);
                        Environment.Exit(-1);
                    }

                    ScrapeCourse scr_rec = db_rph.ScrapeCourses.FirstOrDefault();
                    if (scr_rec == null)
                    {
                        scr_rec = new ScrapeCourse();
                        db_rph.ScrapeCourses.InsertOnSubmit(scr_rec);
                    }
                    scr_rec.LastDateScraped = date_scrape;
                    db_rph.SubmitChanges();

                    Common.Wait();
                    date_scrape = date_scrape.AddDays(1);
                    db_rph      = new RacingPostRacesDataContext(ConfigurationManager.ConnectionStrings["Racing"].ToString());
                }
            }
            catch (Exception exception)
            {
                throw exception;
            }
        }
예제 #11
0
        private static void ProcessDetail(Horse rec)
        {
            var   regex_main = new Regex("<li>(.*?)</li>", RegexOptions.Singleline);
            Match match_main = regex_main.Match(rec.DetailRaw);
            int   ii         = 0;

            while (match_main.Success)
            {
                ii++;
                string item = match_main.Groups[1].ToString().Trim();

                if (ii == 1)
                {
                    var regex_dob_colour_sex =
                        new Regex(@"\((?:(\d{2})([A-Za-z]{3})(\d{2})){0,1}(?: ([a-z/]+)){0,1}(?: ([a-z/]+)){0,1}\s*\)");
                    Match match_dob_colour_sex = regex_dob_colour_sex.Match(item);
                    if (match_dob_colour_sex.Success)
                    {
                        if (match_dob_colour_sex.Groups[1].ToString() != "")
                        {
                            int    dd  = Convert.ToInt32(match_dob_colour_sex.Groups[1].ToString());
                            string mmm = match_dob_colour_sex.Groups[2].ToString();
                            int    mm  = 0;
                            switch (mmm)
                            {
                            case "Jan":
                                mm = 1;
                                break;

                            case "Feb":
                                mm = 2;
                                break;

                            case "Mar":
                                mm = 3;
                                break;

                            case "Apr":
                                mm = 4;
                                break;

                            case "May":
                                mm = 5;
                                break;

                            case "Jun":
                                mm = 6;
                                break;

                            case "Jul":
                                mm = 7;
                                break;

                            case "Aug":
                                mm = 8;
                                break;

                            case "Sep":
                                mm = 9;
                                break;

                            case "Oct":
                                mm = 10;
                                break;

                            case "Nov":
                                mm = 11;
                                break;

                            case "Dec":
                                mm = 12;
                                break;
                            }

                            int yy = Convert.ToInt32(match_dob_colour_sex.Groups[3].ToString());
                            int currentyear2Digit = DateTime.Now.Year % 2000;
                            if (yy <= currentyear2Digit)
                            {
                                yy += 2000;
                            }
                            else
                            {
                                yy += 1900;
                            }

                            if (mm != 0)
                            {
                                rec.FoalDate = new DateTime(yy, mm, dd);
                                rec.FoalYear = yy;
                            }
                        }

                        string field4 = match_dob_colour_sex.Groups[4].ToString().Trim();
                        string field5 = match_dob_colour_sex.Groups[5].ToString().Trim();

                        if (field4 != "" && field5 != "")
                        {
                            rec.Colour = field4;
                            rec.Sex    = field5;
                        }
                        else if (field4 != "")
                        {
                            rec.Sex = field4;
                        }
                    }
                }
                else if (ii == 2)
                {
                    int?   sire_id          = null;
                    string sire_name        = null;
                    string sire_country     = null;
                    int?   dam_id           = null;
                    string dam_name         = null;
                    string dam_country      = null;
                    int?   dam_sire_id      = null;
                    string dam_sire_name    = null;
                    string dam_sire_country = null;

                    var regex_sire =
                        new Regex(
                            @"stallionbook/stallion\.sd\?horse_id=(\d+).*?STALLION"">([^<(]+)(?:\(([A-Z]+)\)){0,1}</a>");
                    Match match_sire = regex_sire.Match(item);
                    ii = 0;
                    while (match_sire.Success)
                    {
                        ii++;
                        if (ii == 1)
                        {
                            sire_id      = Convert.ToInt32(match_sire.Groups[1].ToString());
                            sire_name    = match_sire.Groups[2].ToString().Trim();
                            sire_country = match_sire.Groups[3].ToString().Trim();
                        }
                        else if (ii == 2)
                        {
                            dam_sire_id      = Convert.ToInt32(match_sire.Groups[1].ToString());
                            dam_sire_name    = match_sire.Groups[2].ToString().Trim();
                            dam_sire_country = match_sire.Groups[3].ToString().Trim();
                        }

                        match_sire = match_sire.NextMatch();
                    }

                    var   regex_dam = new Regex(@"dam_home\.sd\?horse_id=(\d+).*?DAM "">([^<(]+)(?:\(([A-Z]+)\)){0,1}</a>");
                    Match match_dam = regex_dam.Match(item);
                    if (match_dam.Success)
                    {
                        dam_id      = Convert.ToInt32(match_dam.Groups[1].ToString());
                        dam_name    = match_dam.Groups[2].ToString().Trim();
                        dam_country = match_dam.Groups[3].ToString().Trim();
                    }

                    if (sire_id != null && sire_id != 0)
                    {
                        Horse sire_rec = db_rph.Horses.FirstOrDefault(x => x.RPId == sire_id);
                        if (sire_rec == null)
                        {
                            sire_rec = new Horse();
                            db_rph.Horses.InsertOnSubmit(sire_rec);
                            sire_rec.RPId = (int)sire_id;
                            sire_rec.PedigreeProcessed = 1;
                            sire_rec.DetailProcessed   = 0;
                        }

                        if (sire_rec.Name == null)
                        {
                            sire_rec.Name     = Common.ProcessName(sire_name);
                            sire_rec.FlatName = Common.FlattenName(sire_name);
                        }

                        if (sire_rec.Country == null)
                        {
                            sire_rec.Country = sire_country;
                        }

                        db_rph.SubmitChanges();

                        if (rec.SireId == null)
                        {
                            rec.SireId = sire_rec.Id;
                        }
                    }

                    if (dam_id != null && dam_id != 0)
                    {
                        Horse dam_rec = db_rph.Horses.FirstOrDefault(x => x.RPId == dam_id);
                        if (dam_rec == null)
                        {
                            dam_rec = new Horse();
                            db_rph.Horses.InsertOnSubmit(dam_rec);
                            dam_rec.RPId = (int)dam_id;
                            dam_rec.PedigreeProcessed = 1;
                            dam_rec.DetailProcessed   = 0;
                        }

                        if (dam_rec.Name == null)
                        {
                            dam_rec.Name     = Common.ProcessName(dam_name);
                            dam_rec.FlatName = Common.FlattenName(dam_name);
                        }

                        if (dam_rec.Country == null)
                        {
                            dam_rec.Country = dam_country;
                        }

                        db_rph.SubmitChanges();

                        if (rec.DamId == null)
                        {
                            rec.DamId = dam_rec.Id;
                        }

                        if (dam_sire_id != null && dam_sire_id != 0)
                        {
                            Horse dam_sire_rec = db_rph.Horses.FirstOrDefault(x => x.RPId == dam_sire_id);
                            if (dam_sire_rec == null)
                            {
                                dam_sire_rec = new Horse();
                                db_rph.Horses.InsertOnSubmit(dam_sire_rec);
                                dam_sire_rec.RPId = (int)dam_sire_id;
                                dam_sire_rec.PedigreeProcessed = 1;
                                dam_sire_rec.DetailProcessed   = 0;
                            }

                            if (dam_sire_rec.Name == null)
                            {
                                dam_sire_rec.Name     = Common.ProcessName(dam_sire_name);
                                dam_sire_rec.FlatName = Common.FlattenName(dam_sire_name);
                            }

                            if (dam_sire_rec.Country == null)
                            {
                                dam_sire_rec.Country = dam_sire_country;
                            }

                            db_rph.SubmitChanges();

                            if (dam_rec.SireId == null)
                            {
                                dam_rec.SireId = dam_sire_rec.Id;
                            }
                        }
                    }
                }

                match_main = match_main.NextMatch();
            }
        }