Exemple #1
0
        public static List <CrawlerLinkDetails> CrawlLinksPage(CrawlerLinkDetails linkDetails)
        {
            if (Logger.IsDebugEnabled)
            {
                Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl);
            }
            HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl);

            if (contentDiv == null)
            {
                return(new List <CrawlerLinkDetails>());
            }

            var links = from a in contentDiv.SelectNodes(".//a")
                        where ShouldFollowLink(a.GetAttributeValue("href", null))
                        select
                        new CrawlerLinkDetails
            {
                DestinationPageType = GetNextPageType(linkDetails.DestinationPageType),
                DestinationUrl      = a.GetAttributeValue("href", null),
                LinkText            = a.InnerText,
                SourcePageType      = linkDetails.DestinationPageType,
                SourcePageUrl       = linkDetails.DestinationUrl
            };

            return(links.ToList());
        }
Exemple #2
0
        public static List<ScorecardDetails> CrawlMatchListPage(CrawlerLinkDetails linkDetails, string season)
        {
            if (Logger.IsDebugEnabled)
                Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl);
            HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl);
            if (contentDiv == null)
                return new List<ScorecardDetails>();

            var scorecards = from row in contentDiv.SelectNodes("./table//tr")
                             let cells = row.SelectNodes("td")
                             where cells != null && cells.Count == 7
                             select GetDetails(cells, season);

            return scorecards.Where(x => x != null).ToList();
        }
Exemple #3
0
        public static List <ScorecardDetails> CrawlMatchListPage(CrawlerLinkDetails linkDetails, string season)
        {
            if (Logger.IsDebugEnabled)
            {
                Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl);
            }
            HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl);

            if (contentDiv == null)
            {
                return(new List <ScorecardDetails>());
            }

            var scorecards = from row in contentDiv.SelectNodes("./table//tr")
                             let cells = row.SelectNodes("td")
                                         where cells != null && cells.Count == 7
                                         select GetDetails(cells, season);

            return(scorecards.Where(x => x != null).ToList());
        }
Exemple #4
0
        public static List<CrawlerLinkDetails> CrawlLinksPage(CrawlerLinkDetails linkDetails)
        {
            if (Logger.IsDebugEnabled)
                Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl);
            HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl);
            if (contentDiv == null)
                return new List<CrawlerLinkDetails>();

            var links = from a in contentDiv.SelectNodes(".//a")
                        where ShouldFollowLink(a.GetAttributeValue("href", null))
                        select
                            new CrawlerLinkDetails
                            {
                                DestinationPageType = GetNextPageType(linkDetails.DestinationPageType),
                                DestinationUrl = a.GetAttributeValue("href", null),
                                LinkText = a.InnerText,
                                SourcePageType = linkDetails.DestinationPageType,
                                SourcePageUrl = linkDetails.DestinationUrl
                            };

            return links.ToList();
        }