public SiteCopyingManager(IHttpResponseProvider httpResponseProvider, IHtmlCrawler htmlCrawler, bool turnVerboseOn = true) { _httpResponseProvider = httpResponseProvider ?? throw new ArgumentNullException(nameof(httpResponseProvider)); _htmlCrawler = htmlCrawler ?? throw new ArgumentNullException(nameof(_htmlCrawler)); _verboseOn = turnVerboseOn; }
public static IEnumerable <string> GetSiteNodeLinks(string rootUri, SiteNode node, TransitionToOtherDomainsLimits transactionLimits, IHtmlCrawler htmlCrawler) { var links = htmlCrawler.FindHtmlPageLinks(node.Html).Where(UriHelper.IsValidLink); var result = FilterLinksAccordingToTransitionToOtherDomainsLimits(rootUri, links, transactionLimits); NLogger.Logger.Info($"{result.Count()} links found for uri {node.Uri}"); return(result); }
public static IEnumerable <string> GetSiteNodeLinks(string rootUri, SiteNode node, TransitionToOtherDomainsLimits transactionLimits, IHtmlCrawler htmlCrawler) { var links = htmlCrawler.FindHtmlPageLinks(node.Html).Where(UriHelper.IsValidLink); return(FilterLinksAccordingToTransitionToOtherDomainsLimits(rootUri, links, transactionLimits)); }