Пример #1
0
        public static List <PedigreeHorse> Extract(string source)
        {
            var horses = new List <PedigreeHorse>();

            PedigreeHorse q_horse = ExtractQueryHorse(source);

            horses.Add(q_horse);

            ExtractPedigreeHorses(source, horses);

            return(horses);
        }
Пример #2
0
        private static void ScrapePage(RacingPostRacesDataContext db_ppdb, string page, ref string outcome,
                                       ref string pqid)
        {
            List <PedigreeHorse> horses = PedigreeExtract.Extract(page);

            var added = new List <string>();

            foreach (PedigreeHorse horse in horses.OrderBy(x => x.Generation).ThenByDescending(x => x.Pedigree))
            {
                if (added.Contains(horse.PQId))
                {
                    continue;
                }

                PQ_Horse pq_horse = db_ppdb.PQ_Horses.FirstOrDefault(x => x.Id == horse.PQId);
                if (pq_horse == null)
                {
                    pq_horse = new PQ_Horse();
                    db_ppdb.PQ_Horses.InsertOnSubmit(pq_horse);
                    pq_horse.Id       = horse.PQId;
                    pq_horse.Name     = horse.Name;
                    pq_horse.FlatName = FlattenName(horse.Name, false);

                    added.Add(horse.PQId);
                }

                if (pq_horse.Country == null)
                {
                    pq_horse.Country = horse.Country;
                }

                if (pq_horse.FoalYear == null)
                {
                    pq_horse.FoalYear = horse.FoalYear;
                }

                if (pq_horse.Colour == null)
                {
                    pq_horse.Colour = horse.Colour;
                }

                if (pq_horse.Sex == null)
                {
                    pq_horse.Sex = horse.Sex;
                }

                if (pq_horse.SireId == null)
                {
                    PedigreeHorse sire1 = horses.FirstOrDefault(x => x.Pedigree == horse.Pedigree + "S");
                    if (sire1 != null)
                    {
                        pq_horse.SireId = sire1.PQId;
                    }
                }

                if (pq_horse.DamId == null)
                {
                    PedigreeHorse dam1 = horses.FirstOrDefault(x => x.Pedigree == horse.Pedigree + "D");
                    if (dam1 != null)
                    {
                        pq_horse.DamId = dam1.PQId;
                    }
                }

                if (horse.Generation == 0)
                {
                    pq_horse.Starts   = horse.Starts;
                    pq_horse.Wins     = horse.Wins;
                    pq_horse.Places   = horse.Places;
                    pq_horse.Earnings = horse.Earnings;
                    pq_horse.Owner    = horse.Owner;
                    pq_horse.Breeder  = horse.Breeder;
                }

                Logger.WriteLog(horse.Pedigree + " " + horse.PQId + " " + horse.Name + " " + horse.Country + " " +
                                horse.FoalYear + " " + horse.Colour
                                + " " + horse.Starts + " " + horse.Wins + " " + horse.Places + " " + horse.Earnings +
                                " " + horse.Owner + " " + horse.Breeder);
            }
        }
Пример #3
0
        private static void ExtractPedigreeHorses(string source, List <PedigreeHorse> horses)
        {
            PedigreeHorse pending = null;
            int           row_num = 0;
            int           rowspan = 0;

            var   regex_table = new Regex("<table.*class=pedigreetable(.*)</table>", RegexOptions.Singleline);
            Match match_table = regex_table.Match(source);

            if (match_table.Success)
            {
                string table = match_table.Groups[1].ToString();

                var   regex_row = new Regex("<tr>(.*?)</tr", RegexOptions.Singleline);
                Match match_row = regex_row.Match(table);
                while (match_row.Success)
                {
                    row_num++;
                    string row = match_row.Groups[1].ToString();
                    // <td  colspan=2 rowspan=16 class=m onmousedown="clickMenu('FOOTSTEPSINTHESAND',5,1,event);"><a href=/footstepsinthes
                    var   regex_col = new Regex("<td([^>]*?)>(.*?)</td", RegexOptions.Singleline);
                    Match match_col = regex_col.Match(row);
                    while (match_col.Success)
                    {
                        string col_parms = match_col.Groups[1].ToString();
                        string col       = match_col.Groups[2].ToString();

                        string pqid      = "";
                        string name      = "";
                        string remainder = "";
                        string country   = "";
                        int    year      = 0;
                        string colour    = "";

                        if (col_parms.Contains("rowspan"))
                        {
                            var   regex_rowspan = new Regex(@"rowspan=(\d+)\s", RegexOptions.Singleline);
                            Match match_rowspan = regex_rowspan.Match(col_parms);
                            if (match_rowspan.Success)
                            {
                                rowspan = Convert.ToInt32(match_rowspan.Groups[1].ToString());
                            }

                            var horse = new PedigreeHorse();

                            if (ExtractHorse(col, ref pqid, ref name, ref remainder))
                            {
                                horse.PQId = pqid;
                                horse.Name = name;

                                if (ExtractCountry(remainder, ref country))
                                {
                                    horse.Country = country;
                                }

                                if (ExtractFoalYear(remainder, ref year))
                                {
                                    horse.FoalYear = year;
                                }

                                if (ExtractColour(remainder, ref colour))
                                {
                                    horse.Colour = colour;
                                }

                                horse.Generation = GetGeneration(rowspan);
                                horse.Pedigree   = GetPedigree(row_num, rowspan);

                                horses.Add(horse);
                            }
                        }
                        else
                        {
                            rowspan = 1;

                            if (col.Contains("<a href"))
                            {
                                pending = new PedigreeHorse();
                                if (ExtractHorse(col, ref pqid, ref name, ref remainder))
                                {
                                    pending.PQId = pqid;
                                    pending.Name = name;

                                    if (ExtractCountry(remainder, ref country))
                                    {
                                        pending.Country = country;
                                    }
                                }
                            }
                            else if (col_parms.Contains("class=m") || col_parms.Contains("class=f"))
                            {
                                if (pending != null && pending.PQId != null)
                                {
                                    if (ExtractFoalYear(col, ref year))
                                    {
                                        pending.FoalYear = year;
                                    }

                                    if (ExtractColour(col, ref colour))
                                    {
                                        pending.Colour = colour;
                                    }

                                    pending.Generation = GetGeneration(rowspan);
                                    pending.Pedigree   = GetPedigree(row_num, rowspan);

                                    horses.Add(pending);
                                    pending = null;
                                }
                            }
                        }
                        match_col = match_col.NextMatch();
                    }
                    match_row = match_row.NextMatch();
                }
            }
        }
Пример #4
0
        private static PedigreeHorse ExtractQueryHorse(string source)
        {
            string topline    = "";
            string subtopline = "";
            string info       = "";

            var horse = new PedigreeHorse();

            var   regex_pqid = new Regex(@"<li><a href=""/(\S+)""[^>]*>Pedigree</a>", RegexOptions.Singleline);
            Match match_pqid = regex_pqid.Match(source);

            if (match_pqid.Success)
            {
                horse.PQId = match_pqid.Groups[1].ToString().Trim();
            }

            var   regex_topline = new Regex(@"<font size='-1' class=normal>(.*?)</font>", RegexOptions.Singleline);
            Match match_topline = regex_topline.Match(source);

            if (match_topline.Success)
            {
                topline = match_topline.Groups[1].ToString().Trim();
            }

            var regex_name = new Regex(
                @"<a href=""javascript:nothing\(\);"" class=""nounderline""[^>]*>([^<]*)</a></b>",
                RegexOptions.Singleline);
            Match match_name = regex_name.Match(topline);

            if (match_name.Success)
            {
                horse.Name = match_name.Groups[1].ToString().Trim();
            }

            //var regex_subtopline = new Regex(@"</a>(\s*\([A-Z]+\).*)DP =", RegexOptions.Singleline);
            var   regex_subtopline = new Regex(@".*</a>(.*?)DP =", RegexOptions.Singleline);
            Match match_subtopline = regex_subtopline.Match(topline);

            if (match_subtopline.Success)
            {
                subtopline = match_subtopline.Groups[1].ToString().Trim();
            }

            string country = "";

            if (ExtractCountry(subtopline, ref country))
            {
                horse.Country = country;
            }

            string colour = "";

            if (ExtractColour(subtopline, ref colour))
            {
                horse.Colour = colour;
            }

            string sex = "";

            if (ExtractSex(subtopline, ref sex))
            {
                horse.Sex = sex;
            }

            int year = 0;

            if (ExtractFoalYear(subtopline, ref year))
            {
                horse.FoalYear = year;
            }

            var   regex_wins = new Regex(@"(\d+) Starts, (\d+|M) Wins, (\d+) Places", RegexOptions.Singleline);
            Match match_wins = regex_wins.Match(topline);

            if (match_wins.Success)
            {
                horse.Starts = Convert.ToInt32(match_wins.Groups[1].ToString());
                string wins_txt = match_wins.Groups[2].ToString();
                if (wins_txt == "M")
                {
                    horse.Wins = 0;
                }
                else
                {
                    horse.Wins = Convert.ToInt32(wins_txt);
                }
                horse.Places = Convert.ToInt32(match_wins.Groups[3].ToString());
            }

            var   regex_earnings = new Regex(@"Career Earnings:</b>(.*)$", RegexOptions.Singleline);
            Match match_earnings = regex_earnings.Match(topline);

            if (match_earnings.Success)
            {
                horse.Earnings = match_earnings.Groups[1].ToString().Trim();
            }

            var   regex_info = new Regex(@"<div id=""subjectinfo""(.*?)</div>", RegexOptions.Singleline);
            Match match_info = regex_info.Match(source);

            if (match_info.Success)
            {
                info = match_info.Groups[1].ToString().Trim();

                var   regex_owner = new Regex(@"Owner</b>:([^<]*)<", RegexOptions.Singleline);
                Match match_owner = regex_owner.Match(info);
                if (match_owner.Success)
                {
                    horse.Owner = match_owner.Groups[1].ToString().Trim();
                }

                var   regex_breeder = new Regex(@"Breeder</b>:([^<]*)<", RegexOptions.Singleline);
                Match match_breeder = regex_breeder.Match(info);
                if (match_breeder.Success)
                {
                    horse.Breeder = match_breeder.Groups[1].ToString().Trim();
                }
            }

            horse.Generation = 0;
            horse.Pedigree   = "";
            return(horse);
        }