public MultiProxyPageRequester(MultiProxyCrawlConfiguration config, IWebContentExtractor contentExtractor)
			: base(config, contentExtractor)
		{
			if (config.Proxies == null || config.Proxies.Length == 0) throw new ArgumentNullException(nameof(config.Proxies));
			if (config.ProxyQueueTimeoutMs <= 0) throw new ArgumentException("Value cannot be less than or equal 0", nameof(config.ProxyQueueTimeoutMs));
			_proxyQueue = new ConcurrentQueue<WebProxy>(config.Proxies);
		}
Exemplo n.º 2
0
        public ProxyPageRequester(HttpClientHandler torHandler, CrawlConfiguration config, IWebContentExtractor contentExtractor = null, HttpClient httpClient = null) : base(config, contentExtractor, httpClient)
        {
            _config           = config;
            _contentExtractor = contentExtractor;

            _torHandler = torHandler;
        }
Exemplo n.º 3
0
 public PageRequester(ILogger <PageRequester> logger, IHttpClientFactory httpClientFactory, CrawlConfiguration crawlConfiguration, IWebContentExtractor webContentExtractor)
 {
     _logger              = logger;
     _httpFactory         = httpClientFactory;
     _client              = _httpFactory.CreateClient();
     _client.Timeout      = TimeSpan.FromMinutes(10);
     _config              = crawlConfiguration;
     _webContentExtractor = webContentExtractor;
 }
Exemplo n.º 4
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            if (config == null)
                throw new ArgumentNullException("config");

            _config = config;

            if (_config.HttpServicePointConnectionLimit > 0)
                ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;

            _extractor = contentExtractor ?? new WebContentExtractor();
        }
Exemplo n.º 5
0
 public WebDownloader(CrawlerSettings settings, IWebContentExtractor contentExtractor)
 {
     _settings         = settings;
     _contentExtractor = contentExtractor;
     _proxy            = new Lazy <IWebProxy>(() =>
     {
         var defaultProxy         = WebRequest.GetSystemWebProxy();
         defaultProxy.Credentials = CredentialCache.DefaultNetworkCredentials;
         return(defaultProxy);
     });
     _cookieContainer = new CookieContainer();
 }
Exemplo n.º 6
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            if (config == null)
                throw new ArgumentNullException("config");

            _userAgentString = config.UserAgentString.Replace("@ABOTASSEMBLYVERSION@", Assembly.GetAssembly(this.GetType()).GetName().Version.ToString());
            _config = config;

            if (_config.HttpServicePointConnectionLimit > 0)
                ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;

            _extractor = contentExtractor ?? new WebContentExtractor();
        }
Exemplo n.º 7
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor, HttpClient httpClient = null)
        {
            _config = config ?? throw new ArgumentNullException(nameof(config));

            _contentExtractor = contentExtractor ?? throw new ArgumentNullException(nameof(contentExtractor));

            if (_config.HttpServicePointConnectionLimit > 0)
            {
                ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;
            }

            _httpClient = httpClient;
        }
Exemplo n.º 8
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            if (config == null)
            {
                throw new ArgumentNullException(nameof(config));
            }

            _config = config;

            _extractor = contentExtractor ?? new WebContentExtractor();

            _httpClientHandler = BuildHttpClientHandler();
            _httpClient        = BuildHttpClient(_httpClientHandler);
        }
Exemplo n.º 9
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            _config = config;

            if (_config.HttpServicePointConnectionLimit > 0)
            {
                ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;
            }

            _extractor = contentExtractor ?? new WebContentExtractor();
        }
Exemplo n.º 10
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            if (config == null)
                throw new ArgumentNullException("config");

            _config = config;

            if (_config.HttpServicePointConnectionLimit > 0)
                ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;

            if (!_config.IsSslCertificateValidationEnabled)
                ServicePointManager.ServerCertificateValidationCallback +=
                    (sender, certificate, chain, sslPolicyErrors) => true;

            _extractor = contentExtractor ?? new WebContentExtractor();
        }
Exemplo n.º 11
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            _config = config ?? throw new ArgumentNullException("config");

            if (_config.HttpServicePointConnectionLimit > 0)
            {
                ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;
            }

            if (!_config.IsSslCertificateValidationEnabled)
            {
                ServicePointManager.ServerCertificateValidationCallback +=
                    (sender, certificate, chain, sslPolicyErrors) => true;
            }

            _extractor = contentExtractor ?? new WebContentExtractor();
        }
Exemplo n.º 12
0
        /// <summary>
        /// Set received config
        /// </summary>
        /// <param name="config"></param>
        /// <param name="contentExtractor"></param>
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            Config    = config ?? throw new ArgumentNullException(nameof(config));
            Extractor = contentExtractor ?? new WebContentExtractor();

            // Set ServicePointManager credentials
            if (Config.HttpServicePointConnectionLimit > 0)
            {
                ServicePointManager.DefaultConnectionLimit = Config.HttpServicePointConnectionLimit;
            }

            if (!Config.IsSslCertificateValidationEnabled)
            {
                ServicePointManager.ServerCertificateValidationCallback +=
                    (sender, certificate, chain, sslPolicyErrors) => true;
            }
        }
Exemplo n.º 13
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            _userAgentString = config.UserAgentString.Replace("@ABOTASSEMBLYVERSION@", Assembly.GetAssembly(this.GetType()).GetName().Version.ToString());
            _config          = config;

            if (_config.HttpServicePointConnectionLimit > 0)
            {
                ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;
            }

            _extractor = contentExtractor ?? new WebContentExtractor();
        }
Exemplo n.º 14
0
        public PageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
        {
            if (config == null)
            {
                throw new ArgumentNullException(nameof(config));
            }

            _config = config;

            //TODO find the .net core equivalent
            //http://stackoverflow.com/questions/36398474/servicepointmanager-defaultconnectionlimit-in-net-core
            //if (_config.HttpServicePointConnectionLimit > 0)
            //    ServicePointManager.DefaultConnectionLimit = _config.HttpServicePointConnectionLimit;

            //if (!_config.IsSslCertificateValidationEnabled)
            //    ServicePointManager.ServerCertificateValidationCallback +=
            //        (sender, certificate, chain, sslPolicyErrors) => true;

            _extractor = contentExtractor ?? new WebContentExtractor();

            _httpClientHandler = BuildHttpClientHandler();
            _httpClient        = BuildHttpClient(_httpClientHandler);
        }
 public AmazonPageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor) : base(config, contentExtractor)
 {
 }
Exemplo n.º 16
0
 public CookieLoadedPageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor)
     : base(config, contentExtractor)
 {
 }
Exemplo n.º 17
0
 public CookieLoadedPageRequester(CrawlConfiguration config, IWebContentExtractor contentExtractor, CookieContainer cookieContainer)
     : base(config, contentExtractor)
 {
     this._cookieContainer = cookieContainer;
 }
Exemplo n.º 18
0
 public PageRequesterWithCookies(CrawlConfiguration config, IWebContentExtractor contentExtractor)
     : base(config, contentExtractor)
 {
 }