public MultiProxyPageRequester(MultiProxyCrawlConfiguration config, IWebContentExtractor contentExtractor) : base(config, contentExtractor) { if (config.Proxies == null || config.Proxies.Length == 0) throw new ArgumentNullException(nameof(config.Proxies)); if (config.ProxyQueueTimeoutMs <= 0) throw new ArgumentException("Value cannot be less than or equal 0", nameof(config.ProxyQueueTimeoutMs)); _proxyQueue = new ConcurrentQueue<WebProxy>(config.Proxies); }
public ProxyPageRequester(HttpClientHandler torHandler, CrawlConfiguration config, IWebContentExtractor contentExtractor = null, HttpClient httpClient = null) : base(config, contentExtractor, httpClient) { _config = config; _contentExtractor = contentExtractor; _torHandler = torHandler; }
public PageRequester(ILogger <PageRequester> logger, IHttpClientFactory httpClientFactory, CrawlConfiguration crawlConfiguration, IWebContentExtractor webContentExtractor) { _logger = logger; _httpFactory = httpClientFactory; _client = _httpFactory.CreateClient(); _client.Timeout = TimeSpan.FromMinutes(10); _config = crawlConfiguration; _webContentExtractor = webContentExtractor; }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { if (config == null) throw new ArgumentNullException("config"); _config = config; if (_config.HttpServicePointConnectionLimit > 0) ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; _extractor = contentExtractor ?? new WebContentExtractor(); }
public WebDownloader(CrawlerSettings settings, IWebContentExtractor contentExtractor) { _settings = settings; _contentExtractor = contentExtractor; _proxy = new Lazy <IWebProxy>(() => { var defaultProxy = WebRequest.GetSystemWebProxy(); defaultProxy.Credentials = CredentialCache.DefaultNetworkCredentials; return(defaultProxy); }); _cookieContainer = new CookieContainer(); }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { if (config == null) throw new ArgumentNullException("config"); _userAgentString = config.UserAgentString.Replace("@ABOTASSEMBLYVERSION@", Assembly.GetAssembly(this.GetType()).GetName().Version.ToString()); _config = config; if (_config.HttpServicePointConnectionLimit > 0) ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; _extractor = contentExtractor ?? new WebContentExtractor(); }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor, HttpClient httpClient = null) { _config = config ?? throw new ArgumentNullException(nameof(config)); _contentExtractor = contentExtractor ?? throw new ArgumentNullException(nameof(contentExtractor)); if (_config.HttpServicePointConnectionLimit > 0) { ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; } _httpClient = httpClient; }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { if (config == null) { throw new ArgumentNullException(nameof(config)); } _config = config; _extractor = contentExtractor ?? new WebContentExtractor(); _httpClientHandler = BuildHttpClientHandler(); _httpClient = BuildHttpClient(_httpClientHandler); }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { if (config == null) { throw new ArgumentNullException("config"); } _config = config; if (_config.HttpServicePointConnectionLimit > 0) { ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; } _extractor = contentExtractor ?? new WebContentExtractor(); }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { if (config == null) throw new ArgumentNullException("config"); _config = config; if (_config.HttpServicePointConnectionLimit > 0) ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; if (!_config.IsSslCertificateValidationEnabled) ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, sslPolicyErrors) => true; _extractor = contentExtractor ?? new WebContentExtractor(); }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { _config = config ?? throw new ArgumentNullException("config"); if (_config.HttpServicePointConnectionLimit > 0) { ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; } if (!_config.IsSslCertificateValidationEnabled) { ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, sslPolicyErrors) => true; } _extractor = contentExtractor ?? new WebContentExtractor(); }
/// <summary> /// Set received config /// </summary> /// <param name="config"></param> /// <param name="contentExtractor"></param> public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { Config = config ?? throw new ArgumentNullException(nameof(config)); Extractor = contentExtractor ?? new WebContentExtractor(); // Set ServicePointManager credentials if (Config.HttpServicePointConnectionLimit > 0) { ServicePointManager.DefaultConnectionLimit = Config.HttpServicePointConnectionLimit; } if (!Config.IsSslCertificateValidationEnabled) { ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, sslPolicyErrors) => true; } }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { if (config == null) { throw new ArgumentNullException("config"); } _userAgentString = config.UserAgentString.Replace("@ABOTASSEMBLYVERSION@", Assembly.GetAssembly(this.GetType()).GetName().Version.ToString()); _config = config; if (_config.HttpServicePointConnectionLimit > 0) { ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; } _extractor = contentExtractor ?? new WebContentExtractor(); }
public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) { if (config == null) { throw new ArgumentNullException(nameof(config)); } _config = config; //TODO find the .net core equivalent //http://stackoverflow.com/questions/36398474/servicepointmanager-defaultconnectionlimit-in-net-core //if (_config.HttpServicePointConnectionLimit > 0) // ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit; //if (!_config.IsSslCertificateValidationEnabled) // ServicePointManager.ServerCertificateValidationCallback += // (sender, certificate, chain, sslPolicyErrors) => true; _extractor = contentExtractor ?? new WebContentExtractor(); _httpClientHandler = BuildHttpClientHandler(); _httpClient = BuildHttpClient(_httpClientHandler); }
public AmazonPageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) : base(config, contentExtractor) { }
public CookieLoadedPageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) : base(config, contentExtractor) { }
public CookieLoadedPageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor, CookieContainer cookieContainer) : base(config, contentExtractor) { this._cookieContainer = cookieContainer; }
public PageRequesterWithCookies(CrawlConfiguration config, IWebContentExtractor contentExtractor) : base(config, contentExtractor) { }