public static List <CrawlerLinkDetails> CrawlLinksPage(CrawlerLinkDetails linkDetails) { if (Logger.IsDebugEnabled) { Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl); } HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl); if (contentDiv == null) { return(new List <CrawlerLinkDetails>()); } var links = from a in contentDiv.SelectNodes(".//a") where ShouldFollowLink(a.GetAttributeValue("href", null)) select new CrawlerLinkDetails { DestinationPageType = GetNextPageType(linkDetails.DestinationPageType), DestinationUrl = a.GetAttributeValue("href", null), LinkText = a.InnerText, SourcePageType = linkDetails.DestinationPageType, SourcePageUrl = linkDetails.DestinationUrl }; return(links.ToList()); }
public static List<ScorecardDetails> CrawlMatchListPage(CrawlerLinkDetails linkDetails, string season) { if (Logger.IsDebugEnabled) Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl); HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl); if (contentDiv == null) return new List<ScorecardDetails>(); var scorecards = from row in contentDiv.SelectNodes("./table//tr") let cells = row.SelectNodes("td") where cells != null && cells.Count == 7 select GetDetails(cells, season); return scorecards.Where(x => x != null).ToList(); }
public static List <ScorecardDetails> CrawlMatchListPage(CrawlerLinkDetails linkDetails, string season) { if (Logger.IsDebugEnabled) { Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl); } HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl); if (contentDiv == null) { return(new List <ScorecardDetails>()); } var scorecards = from row in contentDiv.SelectNodes("./table//tr") let cells = row.SelectNodes("td") where cells != null && cells.Count == 7 select GetDetails(cells, season); return(scorecards.Where(x => x != null).ToList()); }
public static List<CrawlerLinkDetails> CrawlLinksPage(CrawlerLinkDetails linkDetails) { if (Logger.IsDebugEnabled) Logger.DebugFormat("Crawling {0} ...", linkDetails.DestinationUrl); HtmlNode contentDiv = WebClient.GetWebContentNode(linkDetails.DestinationUrl); if (contentDiv == null) return new List<CrawlerLinkDetails>(); var links = from a in contentDiv.SelectNodes(".//a") where ShouldFollowLink(a.GetAttributeValue("href", null)) select new CrawlerLinkDetails { DestinationPageType = GetNextPageType(linkDetails.DestinationPageType), DestinationUrl = a.GetAttributeValue("href", null), LinkText = a.InnerText, SourcePageType = linkDetails.DestinationPageType, SourcePageUrl = linkDetails.DestinationUrl }; return links.ToList(); }