Exemple #1
0
        private static void CrawlGame(string gid)
        {
            string gameId = gid.ToString();
            string url    = @"http://rivals.yahoo.com/ncaa/basketball/boxscore?gid=" + gameId;

            if (long.Parse(gameId) > 201103100000)
            {
                url += "&old_bs=1";
            }
            Uri          boxscoreUrl = new Uri(url);
            string       html        = DownloadAndCache(boxscoreUrl);
            HtmlDocument document    = new HtmlDocument();

            document.LoadHtml(html);

            // the date is the first 8 integers of the game id
            string date = gid.Substring(0, 8);

            // get the team code values from the summarized table
            // /html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[3]

            // path to the top team code URL
            HtmlNode awayUrlNode =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[3]/tr[1]/td[2]/table[1]/tr[1]/td[1]/table[1]/tr[1]/td[1]/table[1]/tr[4]/td[2]");
            HtmlNode homeUrlNode =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[3]/tr[1]/td[2]/table[1]/tr[1]/td[1]/table[1]/tr[1]/td[1]/table[1]/tr[6]/td[2]");

            // get the teamcode from the node
            string awayTeamCode = GetTeamCodeFromNode(awayUrlNode);
            string homeTeamCode = GetTeamCodeFromNode(homeUrlNode);

            HtmlNode awayTable =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[5]");
            HtmlNode homeTable =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[7]");

            // we have both tables and both teams set
            if (!string.IsNullOrEmpty(awayTeamCode) && !string.IsNullOrEmpty(homeTeamCode) && awayTable != null && homeTable != null)
            {
                // create the game object and write to disk
                GameRow row = new GameRow();
                row.date     = date;
                row.gid      = gid;
                row.awayTeam = awayTeamCode;
                row.homeTeam = homeTeamCode;
                WriteGameRow(row);

                // parse the top (away) table
                ParseScoreTable(awayTable, gid, awayTeamCode);

                // parse the bottom (home) table
                ParseScoreTable(homeTable, gid, homeTeamCode);
            }
            else
            {
                // could not parse this game, log it
                ErrorLog.WriteLine("Could not parse " + gid);
                ErrorLog.Flush();
            }
        }
Exemple #2
0
        private static void WriteGameRow(GameRow row)
        {
            if (GameStream == null)
            {
                GameStream = new StreamWriter(Path.Combine(OUTPUT_PATH, "Games.tsv"));
            }

            GameStream.WriteLine("{0}\t{1}\t{2}\t{3}", row.gid, row.date, row.homeTeam, row.awayTeam);
        }
Exemple #3
0
        private static void WriteGameRow(GameRow row)
        {
            if (GameStream == null)
            {
                GameStream = new StreamWriter(Path.Combine(OUTPUT_PATH, "Games.tsv"));
            }

            GameStream.WriteLine("{0}\t{1}\t{2}\t{3}", row.gid, row.date, row.homeTeam, row.awayTeam);
        }
Exemple #4
0
        private static void CrawlGame(string gid)
        {
            string gameId = gid.ToString();
            string url = @"http://rivals.yahoo.com/ncaa/basketball/boxscore?gid=" + gameId;
            if (long.Parse(gameId) > 201103100000)
            {
                url += "&old_bs=1";
            }
            Uri boxscoreUrl = new Uri(url);
            string html = DownloadAndCache(boxscoreUrl);
            HtmlDocument document = new HtmlDocument();
            document.LoadHtml(html);

            // the date is the first 8 integers of the game id
            string date = gid.Substring(0, 8);

            // get the team code values from the summarized table
            // /html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[3]

            // path to the top team code URL
            HtmlNode awayUrlNode =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[3]/tr[1]/td[2]/table[1]/tr[1]/td[1]/table[1]/tr[1]/td[1]/table[1]/tr[4]/td[2]");
            HtmlNode homeUrlNode =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[3]/tr[1]/td[2]/table[1]/tr[1]/td[1]/table[1]/tr[1]/td[1]/table[1]/tr[6]/td[2]");

            // get the teamcode from the node
            string awayTeamCode = GetTeamCodeFromNode(awayUrlNode);
            string homeTeamCode = GetTeamCodeFromNode(homeUrlNode);

            HtmlNode awayTable =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[5]");
            HtmlNode homeTable =
                document.DocumentNode.SelectSingleNode(@"/html[1]/head[1]/body[1]/div[1]/table[1]/tr[1]/td[1]/table[2]/tr[1]/td[1]/div[1]/table[7]");

            // we have both tables and both teams set
            if (!string.IsNullOrEmpty(awayTeamCode) && !string.IsNullOrEmpty(homeTeamCode) && awayTable != null && homeTable != null)
            {
                // create the game object and write to disk
                GameRow row = new GameRow();
                row.date = date;
                row.gid = gid;
                row.awayTeam = awayTeamCode;
                row.homeTeam = homeTeamCode;
                WriteGameRow(row);

                // parse the top (away) table
                ParseScoreTable(awayTable, gid, awayTeamCode);

                // parse the bottom (home) table
                ParseScoreTable(homeTable, gid, homeTeamCode);
            }
            else
            {
                // could not parse this game, log it
                ErrorLog.WriteLine("Could not parse " + gid);
                ErrorLog.Flush();
            }
        }