public MultiProxyPageRequester(MultiProxyCrawlConfiguration config, IWebContentExtractor contentExtractor) : base(config, contentExtractor) { if (config.Proxies == null || config.Proxies.Length == 0) throw new ArgumentNullException(nameof(config.Proxies)); if (config.ProxyQueueTimeoutMs <= 0) throw new ArgumentException("Value cannot be less than or equal 0", nameof(config.ProxyQueueTimeoutMs)); _proxyQueue = new ConcurrentQueue<WebProxy>(config.Proxies); }
public MultiProxyPoliteWebCrawler( MultiProxyCrawlConfiguration crawlConfiguration, ICrawlDecisionMaker crawlDecisionMaker, IThreadManager threadManager, IScheduler scheduler, IPageRequester pageRequester, IHyperLinkParser hyperLinkParser, IMemoryManager memoryManager, IMultiProxyDomainRateLimiter domainRateLimiter, IRobotsDotTextFinder robotsDotTextFinder) : base(crawlConfiguration, crawlDecisionMaker, threadManager, scheduler, pageRequester, hyperLinkParser, memoryManager) { if ((pageRequester as MultiProxyPageRequester) == null) _pageRequester = new MultiProxyPageRequester(crawlConfiguration); _domainRateLimiter = domainRateLimiter ?? new MultiProxyDomainRateLimiter(_crawlContext.CrawlConfiguration.MinCrawlDelayPerDomainMilliSeconds); _robotsDotTextFinder = robotsDotTextFinder ?? new RobotsDotTextFinder(_pageRequester); }
public MultiProxyPoliteWebCrawler(MultiProxyCrawlConfiguration crawlConfiguration) : this(crawlConfiguration, null, null, null, null, null, null, null, null) { }
public MultiProxyPageRequester(MultiProxyCrawlConfiguration config) : this(config, null) { }