예제 #1
0
        public static void DownloadRaceList(DateTime date, List <int?> raceIds, RacingPostRacesEntities db)
        {
            string country;
            var    url     = string.Format(@"https://www.racingpost.com/results/{0}/time-order", String.Format("{0:yyyy-MM-dd}", date));
            var    Browser = new ScrapingBrowser();

            Browser.AllowAutoRedirect = true; // Browser has many settings you can access in setup
            Browser.AllowMetaRedirect = true;
            //go to the home page
            //var PageResult = Browser.NavigateToPage(new Uri(url));
            var web = new HtmlWeb();
            var doc = web.Load(url);

            var nodes = doc.QuerySelectorAll("div .rp-timeView__raceInfo").ToList();

            // List<HtmlNode> nodes = doc.QuerySelectorAll("div .rp-timeView__buttons > a").ToList();
            foreach (var item in nodes)
            {
                var courseUrl = baseUrl + item.ChildNodes[1].ChildNodes[1].Attributes["href"].Value;
                var courseId  = Helper.GetIdfromUrl(courseUrl, "https://www.racingpost.com/profile/course/");

                string raceUrl = "";
                if (item.ChildNodes[3].ChildNodes[1].Attributes.Any(a => a.Name == "href"))
                {
                    raceUrl = baseUrl + item.ChildNodes[3].ChildNodes[1].Attributes["href"].Value;
                }
                else
                {
                    continue;
                }


                int?raceId = Convert.ToInt32(raceUrl.Split('/').LastOrDefault());
                if (!raceIds.Any(r => r == raceId))
                {
                    //save url to be scraped
                    ScrapeRace scrapeRace = new ScrapeRace();
                    scrapeRace.Link      = raceUrl;
                    scrapeRace.RaceId    = raceId;
                    scrapeRace.RaceDate  = date;
                    scrapeRace.Scraped   = false;
                    scrapeRace.Required  = true;
                    scrapeRace.CourseUrl = courseUrl;
                    var course = AllCourses.Where(c => c.Id == courseId).FirstOrDefault();
                    if (course == null)
                    {
                        RPCourse c = new RPCourse {
                            Id = courseId, Name = courseUrl.Split('/').LastOrDefault().ToUpper()
                        };
                        db.RPCourses.Add(c);
                        db.SaveChanges();
                        AllCourses.Add(c);
                    }
                    country            = AllCourses.Where(c => c.Id == courseId).FirstOrDefault().Country;
                    scrapeRace.Country = string.IsNullOrEmpty(country) ? "GB" : country;
                    db.ScrapeRaces.Add(scrapeRace);
                    db.SaveChanges();
                }
            }
        }
예제 #2
0
파일: Program.cs 프로젝트: zubair1599/RP
        private static void SaveHorse(RacingPostRacesEntities db, RootObject result)
        {
            Console.Write(string.Format("Downlaod horse {0} name {1} \n", result.profile.horseUid, result.profile.horseName));
            RPHorse rpHorse = new RPHorse();

            rpHorse.Name     = result.profile.horseName;
            rpHorse.RPId     = result.profile.horseUid;
            rpHorse.Country  = result.profile.horseCountryOriginCode;
            rpHorse.Colour   = result.profile.horseColour;
            rpHorse.Sex      = result.profile.horseSexCode;
            rpHorse.SireId   = result.profile.sireUid;
            rpHorse.DamId    = result.profile.damUid;
            rpHorse.FoalYear = Convert.ToDateTime(result.profile.horseDateOfBirth).Year;
            if (rpHorse.FoalYear > 1)
            {
                rpHorse.FoalDate = Convert.ToDateTime(result.profile.horseDateOfBirth);
            }
            else
            {
                rpHorse.FoalDate = Convert.ToDateTime("1/1/1753");
            }
            rpHorse.PostTemplate = true;
            db.RPHorses.Add(rpHorse);
            db.SaveChanges();
        }
예제 #3
0
파일: Program.cs 프로젝트: zubair1599/RP
        public static void SaveRunner(Race race)
        {
            using (RacingPostRacesEntities db = new RacingPostRacesEntities())
            {
                int?prevPos = 0;
                foreach (var runner in race.Runners)
                {
                    RPRunner rpRunner = new RPRunner();
                    rpRunner.HorseId = runner.HorseId;
                    rpRunner.RaceId  = race.Id;
                    if (DidNotFinsh.Any(df => df.Equals(runner.PosTemp)))
                    {
                        rpRunner.DidNotFinish = runner.PosTemp;
                        rpRunner.Position     = prevPos + 1;
                        prevPos = rpRunner.Position;
                    }
                    else
                    {
                        rpRunner.Position = Convert.ToInt32(runner.PosTemp);
                        prevPos           = rpRunner.Position;
                    }
                    rpRunner.Status   = "Runner";
                    rpRunner.Draw     = string.IsNullOrEmpty(runner.Draw) ? 0 : Convert.ToInt32(runner.Draw);
                    rpRunner.Distance = runner.Distance;
                    // rpRunner.DistBeaten = Convert.ToDouble(runner.DistBeaten);
                    rpRunner.Price     = runner.SP;
                    rpRunner.WeightRaw = runner.WeightRaw;
                    rpRunner.Age       = Convert.ToInt32(runner.Age);

                    if (db.Jockeys.FirstOrDefault(j => j.Id == runner.JockeyId) == null)
                    {
                        db.Jockeys.Add(new Jockey {
                            Id = runner.JockeyId, Name = runner.Jockey
                        });
                    }
                    if (db.Trainers.FirstOrDefault(j => j.Id == runner.TrainerId) == null)
                    {
                        db.Trainers.Add(new Trainer {
                            Id = runner.TrainerId, Name = runner.Trainer
                        });
                    }

                    rpRunner.JockeyId     = runner.JockeyId;
                    rpRunner.TrainerId    = runner.TrainerId;
                    rpRunner.PostTemplate = true;
                    SaveHorse(new Horse {
                        Id = (int)rpRunner.HorseId
                    });
                    db.RPRunners.Add(rpRunner);
                    db.SaveChanges();
                }
            }
        }
예제 #4
0
파일: Program.cs 프로젝트: zubair1599/RP
        static void HorseScrape(string[] args)
        {
            List <int> horseIds = new List <int>();

            RPHorse rpHorse;

            using (StreamReader file = File.OpenText(@"C:\Users\MuhammadZubair\Documents\BELData\horse(Feb)(2).json"))
                using (JsonTextReader reader = new JsonTextReader(file))
                {
                    JObject        o2         = (JObject)JToken.ReadFrom(reader);
                    JsonSerializer serializer = new JsonSerializer();
                    //var aa= JsonConvert.DeserializeObject<RPData>(text);
                    var rpHorses = o2.ToObject <RPHorses>();//.Deserialize<RPData>(reader);
                    Cleaner.GetBaseData();
                    // Cleaner.GetFooter();
                    int raceCount = 1;
                    using (RacingPostRacesEntities db = new RacingPostRacesEntities())
                    {
                        foreach (var item in rpHorses.Horses)
                        {
                            HorseCleaning.PrcessHorse(item);
                            HorseCleaning.PrcessHeader(item);
                            raceCount++;

                            rpHorse              = new RPHorse();
                            rpHorse.Name         = item.Name;
                            rpHorse.RPId         = item.Id;
                            rpHorse.Country      = item.Country;
                            rpHorse.Colour       = item.Color;
                            rpHorse.Sex          = item.Sex;
                            rpHorse.SireId       = item.SireId;
                            rpHorse.DamId        = item.DamId;
                            rpHorse.FoalDate     = item.DOB;
                            rpHorse.FoalYear     = item.DOB.Year;
                            rpHorse.PostTemplate = true;

                            db.RPHorses.Add(rpHorse);
                        }
                        db.SaveChanges();
                    }

                    //horseIds = horseIds.Distinct().ToList();
                    //foreach (var item in horseIds)
                    //{
                    //    System.Diagnostics.Trace.Write(item + " ,\n");
                    //}

                    // Cleaner.ProcessClass(rpData.AllRaces[20]);//  "(Class 4) | (4yo+) (2m5f82y)| | 2m5&frac12;f Heavy 10 hdles 1 omitted");
                }
        }
예제 #5
0
파일: Program.cs 프로젝트: zubair1599/RP
        public static void SaveRace(Race race)
        {
            if (race.Id == 0)
            {
                throw new ArgumentNullException();
            }
            using (RacingPostRacesEntities db = new RacingPostRacesEntities())
            {
                if (db.RPRaces.FirstOrDefault(r => r.Id == race.Id) == null)
                {
                    RPRace rpRace = new RPRace();
                    rpRace.Id            = race.Id;
                    rpRace.CourseId      = race.CourseId;
                    rpRace.StartTime     = race.StartTime;
                    rpRace.Name          = race.Name;
                    rpRace.RaceType      = race.RaceType;
                    rpRace.Handicap      = race.Handicap;
                    rpRace.Chase         = race.Chase;
                    rpRace.Fences        = race.Fences;
                    rpRace.FencesOmitted = race.FencesOmitted;
                    rpRace.FencesHurdles = race.FencesHurdles;
                    rpRace.ClassRaw      = race.ClassRaw;
                    rpRace.Class         = race.Class;
                    rpRace.GradeGroup    = race.GradeGroup;
                    rpRace.Rating        = race.Rating;
                    rpRace.Eligibility   = race.Eligibility;
                    rpRace.DistanceYards = race.DistanceYards;
                    rpRace.DistanceStd   = race.DistanceStd;
                    rpRace.Distance      = race.Distance;
                    rpRace.Going         = race.Going;
                    rpRace.PrizeMoney    = race.PrizeMoney;
                    rpRace.Prize1st      = race.Prize1st;
                    rpRace.Prize2nd      = race.Prize2nd;
                    rpRace.Prize3rd      = race.Prize3rd;
                    rpRace.Prize4th      = race.Prize4th;
                    rpRace.Prize5th      = race.Prize5th;
                    rpRace.Prize6th      = race.Prize6th;
                    rpRace.CurrencyUnit  = race.CurrencyUnit;
                    rpRace.Runners       = Convert.ToInt32(race.NoOfRunners);
                    rpRace.Time          = race.WinTime;
                    rpRace.NonRunners    = race.NonRunners;
                    rpRace.PostTemplate  = true;
                    db.RPRaces.Add(rpRace);
                    SaveRunner(race);

                    db.SaveChanges();
                }
            }
        }
예제 #6
0
        public static void ScrapeRace()
        {
            using (RacingPostRacesEntities db = new RacingPostRacesEntities())
            {
                var startDate = db.ScrapeCourses.FirstOrDefault().LastDateScraped;

                //Check current date has all link downlaoded
                var alreadyDownloaded = db.ScrapeRaces.Select(s => s.RaceId).ToList();
                Courses = db.RPCourses.ToList();

                //scrape page start

                var linksToDownload = db.ScrapeRaces.Where(link => link.Required == true && link.Scraped == false).OrderBy(d => d.RaceDate).ToList();

                foreach (var url in linksToDownload)
                {
                    Thread.Sleep(1000);
                    int retry = 0;
                    while (retry <= 3)
                    {
                        using (System.Data.Entity.DbContextTransaction dbTran = db.Database.BeginTransaction())
                        {
                            try
                            {
                                Console.Write(string.Format("Downloading race for  {0} - {1}\n", url.RaceDate, url.RaceId));

                                DownloadSingleRace(url.Link, db);
                                url.Scraped = true;
                                db.SaveChanges();
                                dbTran.Commit();
                                break;
                            }
                            catch (Exception ex)
                            {
                                dbTran.Rollback();
                                retry++;
                                //throw;
                            }
                        }
                    }
                }
            }
        }
예제 #7
0
        public static void ScrapeRaceList(DateTime endDate)
        {
            using (RacingPostRacesEntities db = new RacingPostRacesEntities())
            {
                var startDate = db.ScrapeCourses.FirstOrDefault().LastDateScraped;

                //Check current date has all link downlaoded
                var alreadyDownloaded = db.ScrapeRaces.Select(s => s.RaceId).ToList();
                AllCourses = db.RPCourses.ToList();

                //scrape page start

                while (startDate <= endDate)
                {
                    Thread.Sleep(2000);
                    Console.Write(string.Format("Scrape links for date {0} \n", startDate));

                    DownloadRaceList(startDate, alreadyDownloaded, db);
                    db.ScrapeCourses.FirstOrDefault().LastDateScraped = startDate;
                    db.SaveChanges();
                    startDate = startDate.AddDays(1);
                }
            }
        }
예제 #8
0
        public static void ProcessRunner(RPRace race, HtmlDocument doc, RacingPostRacesEntities db)
        {
            RPRunner runner;
            var      rows = doc.QuerySelectorAll("tbody").FirstOrDefault().ChildNodes.Where(c => c.Name == "tr").ToList().Where(row => row.Attributes[0].Value == "rp-horseTable__mainRow").ToList();

            foreach (var item in rows)
            {
                runner              = new RPRunner();
                runner.Status       = "Runner";
                runner.PostTemplate = true;
                runner.RaceId       = race.Id;

                var posTemp = item.QuerySelectorAll("div .rp-horseTable__pos").FirstOrDefault().ChildNodes[3].ChildNodes[1].ChildNodes[0].InnerHtml.Replace("\n", "").Trim();
                if (DidNotFinsh.Any(df => df.Equals(posTemp)))
                {
                    runner.DidNotFinish = posTemp;
                }
                else
                {
                    runner.Position = Convert.ToInt32(item.QuerySelectorAll("div .rp-horseTable__pos").FirstOrDefault().ChildNodes[3].ChildNodes[1].ChildNodes[0].InnerHtml.Replace("\n", "").Trim());
                }

                string draw = item.QuerySelectorAll("div .rp-horseTable__pos").FirstOrDefault().ChildNodes[3].ChildNodes[1].ChildNodes[2].InnerHtml.Replace("&nbsp;(", "").Replace(")", "").Trim();
                int    DrawPos;
                int.TryParse(draw, out DrawPos);
                runner.Draw = DrawPos > 0 ? Convert.ToInt32(DrawPos) : (int?)null;

                var lendthAttr = item.QuerySelectorAll("span .rp-horseTable__pos__length").FirstOrDefault();
                if (lendthAttr.ChildNodes.Count >= 2)
                {
                    runner.Distance = lendthAttr.ChildNodes[1].InnerHtml;
                }
                if (lendthAttr.ChildNodes.Count >= 4)
                {
                    // var dis = FractionToDouble(lendthAttr.ChildNodes[3].InnerHtml.Replace("[", "").Replace("]", ""));
                }
                var horseUrl = item.QuerySelectorAll("a .rp-horseTable__horse__name").FirstOrDefault().Attributes[0].Value;
                runner.HorseId = Helper.GetIdfromUrl(horseUrl, "/profile/horse/");
                runner.Price   = item.QuerySelectorAll("span .rp-horseTable__horse__price").FirstOrDefault().InnerHtml.Replace("\n", "").Trim();
                var persons = item.QuerySelectorAll("span .rp-horseTable__human__wrapper");

                //jockey info
                var jockeyUrl = persons.FirstOrDefault().ChildNodes[1];
                runner.JockeyId = Helper.GetIdfromUrl(jockeyUrl.Attributes[0].Value, "/profile/jockey/");
                Jockey jockey = new Jockey();
                jockey.Id   = Convert.ToInt32(runner.JockeyId);
                jockey.Name = jockeyUrl.InnerHtml.Replace("\n", "").Trim();
                jockey.Name = jockey.Name.Substring(0, jockey.Name.IndexOf("<"));

                if (!db.Jockeys.Where(j => j.Id == jockey.Id).Any())
                {
                    db.Jockeys.Add(jockey);
                    db.SaveChanges();
                }

                //trainer info
                var trainerUrl = persons[1].ChildNodes[1];
                runner.TrainerId = Helper.GetIdfromUrl(trainerUrl.Attributes[0].Value, "/profile/trainer/");
                Trainer trainer = new Trainer();
                trainer.Id   = Convert.ToInt32(runner.TrainerId);
                trainer.Name = trainerUrl.InnerHtml.Replace("\n", "").Trim();
                if (trainer.Name.IndexOf("<") > 0)
                {
                    trainer.Name = trainer.Name.Substring(0, trainer.Name.IndexOf("<"));
                }
                if (!db.Trainers.Where(j => j.Id == trainer.Id).Any())
                {
                    db.Trainers.Add(trainer);
                    db.SaveChanges();
                }
                var age = item.ChildNodes[7].InnerHtml.Replace("\n", "").Trim();
                if (age.IndexOf("<") > 0)
                {
                    age = age.Substring(0, age.IndexOf("<"));
                }
                runner.Age = Convert.ToInt32(age);

                var wt = item.ChildNodes[9].InnerHtml.Replace("\n", "").Trim();
                runner.WeightRaw = ProcessWt(wt);
                db.RPRunners.Add(runner);
            }
        }
예제 #9
0
        public static void DownloadSingleRace(string url, RacingPostRacesEntities db)
        {
            int raceId = Convert.ToInt32(url.Split('/').LastOrDefault());

            if (db.RPRaces.Any(r => r.Id == raceId))
            {
                return;
            }

            var Browser = new ScrapingBrowser();

            Browser.AllowAutoRedirect = true; // Browser has many settings you can access in setup
            Browser.AllowMetaRedirect = true;
            //go to the home page
            //var PageResult = Browser.NavigateToPage(new Uri(url));
            var web = new HtmlWeb();
            var doc = web.Load(url);


            //Extract Header
            RPRace race = new RPRace();

            race.Id           = raceId;
            race.StartTime    = Convert.ToDateTime(doc.QuerySelectorAll("span .rp-raceTimeCourseName__date").FirstOrDefault().InnerHtml);
            race.Time         = doc.QuerySelectorAll("span .rp-raceTimeCourseName__time").FirstOrDefault().InnerHtml;
            race.Name         = doc.QuerySelectorAll("h2 .rp-raceTimeCourseName__title").FirstOrDefault().InnerHtml;
            race.CourseId     = Convert.ToInt32(url.Split('/')[4]);
            race.PostTemplate = true;

            if (doc.QuerySelectorAll("span .rp-raceTimeCourseName_class").FirstOrDefault() != null)
            {
                string classDesc = doc.QuerySelectorAll("span .rp-raceTimeCourseName_class").FirstOrDefault().InnerHtml;
                classDesc  = classDesc.Replace("(Class ", "").Replace(")", "").Replace("\n", "").Trim();
                race.Class = Convert.ToInt32(classDesc);
            }
            if (doc.QuerySelectorAll("span .rp-raceTimeCourseName_ratingBandAndAgesAllowed").FirstOrDefault() != null)
            {
                string ageAllowed = doc.QuerySelectorAll("span .rp-raceTimeCourseName_ratingBandAndAgesAllowed").FirstOrDefault().InnerHtml;
                ageAllowed       = ageAllowed.Replace("\n", "").Replace(")", "").Replace("(", "").Trim();//.Split(',')[1];
                race.Eligibility = ageAllowed;
            }

            race.Distance = doc.QuerySelectorAll("span .rp-raceTimeCourseName_distance").FirstOrDefault().InnerHtml.Replace("\n", "").Trim();
            var classRaw = doc.QuerySelectorAll("span .rp-raceTimeCourseName__info_container").FirstOrDefault();

            classRaw.ChildNodes.ToList().ForEach(c =>
            {
                if (c.Name.Equals("span"))
                {
                    race.ClassRaw = race.ClassRaw + c.InnerHtml.Replace("\n", "").Trim() + " | ";
                }
                if (c.Name.Equals("div"))
                {
                    c.ChildNodes.ToList().ForEach(p =>
                    {
                        race.PrizeMoney = race.PrizeMoney + p.InnerHtml.Replace("\n", "").Trim().Replace("&pound;", "£") + " ";
                    });
                    race.PrizeMoney = race.PrizeMoney.Trim();
                }
            });

            race.Notes = doc.QuerySelectorAll("div .rp-raceInfo").FirstOrDefault().InnerHtml;
            ProcessRaceAttributes(race);
            ProcessFooter(race);
            ProcessRunner(race, doc, db);
            db.RPRaces.Add(race);
            db.SaveChanges();
        }