private void ScrapeWebpage(IWebpage webpage) { if (CurrentCrawledPages >= MaxPagesToCrawl) { Console.WriteLine("Crawled page count reached the maximum"); return; } Console.WriteLine("{0, 50} : Starting to scrape", webpage.URL); webpage.Download(); CurrentCrawledPages++; Console.WriteLine("{0, 50} : SourceCode downloaded", webpage.URL); foreach (Webpage newWebpage in webpage.NextWebpages()) { ScrapeWebpage(newWebpage); } }
private string FindWebpageTitle(IWebpage webpage) { return(new WebpageReader(webpage).FindPageElementValue(_webpageTitleSearch)); }
private static PageSearcher CreatePageSearcher(IWebpage webpage) { return(new PageSearcher(new WebpageReader(webpage), new ContentExtractionStrategyFactory())); }