private async Task CheckProxyAsync(Proxy proxy) { var httpClientHandler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate, UseCookies = false, Proxy = new WebProxy(proxy.Ip, proxy.Port) }; using (var httpClient = new HttpClient(httpClientHandler, true)) { httpClient.Timeout = TimeSpan.FromSeconds(5); try { var stopwatch = Stopwatch.StartNew(); var responseStr = await httpClient.GetStringAsync("http://hb.opencpu.org/get?show_env"); var responseTime = stopwatch.ElapsedMilliseconds; var response = JsonConvert.DeserializeObject <HttpBinResponse>(responseStr); proxy.Checked = true; proxy.ResponseTime = responseTime; proxy.Protocol = ProxyProtocol.HTTP; var headers = response.Headers.ToDictionary(x => x.Key.ToLower(), y => y.Value); if (headers.ContainsKey("x-forwarded-for")) { // TODO: Need to check for real ip. proxy.Type = ProxyType.Transparent; } else if (headers.ContainsKey("via")) { // TODO: Need to check for real ip. proxy.Type = ProxyType.Transparent; } else { proxy.Type = ProxyType.Elite; } _proxyPool.Put(proxy); _logger.LogDebug("[{0}] Works, took {1}ms", proxy, proxy.ResponseTime); } catch (Exception e) { _logger.LogDebug("[{0}] Exception: {1}", proxy, e.GetType().Name); return; } } }
protected override async Task ExecuteAsync(CancellationToken stoppingToken) { while (!stoppingToken.IsCancellationRequested) { try { // Fetch outdated providers. var time = DateTimeOffset.UtcNow; var providerConfigs = _providers.Where(x => x.Previous.HasValue == false || x.Previous - time > x.Interval); using (var concurrencyLock = new SemaphoreSlim(MaxConcurrency)) using (var scope = _scopeFactory.CreateScope()) { var httpClientFactory = scope.ServiceProvider.GetRequiredService<IHttpClientFactory>(); var scrapeTasks = new List<Task>(); // Fetch from every provider. foreach (var providerConfig in providerConfigs) { // Wait until there is room. await concurrencyLock.WaitAsync(stoppingToken); var task = Task.Run(async () => { var name = providerConfig.Provider.GetName(); var client = httpClientFactory.CreateClient("Scraper"); try { var proxies = await providerConfig.Provider.GetProxiesAsync(client); foreach (var proxy in proxies) { _proxyPool.Put(proxy); } _logger.LogDebug("Scraped {0} from {1}.", proxies.Count, name); } catch (Exception e) { _logger.LogError(e, "Error in {0}.", name); } finally { providerConfig.Previous = time; // Allow another provider to be scraped. concurrencyLock.Release(); } }, stoppingToken); scrapeTasks.Add(task); } await Task.WhenAll(scrapeTasks); } // Wait 5 seconds. await Task.Delay(5000, stoppingToken); } catch (TaskCanceledException) { if (stoppingToken.IsCancellationRequested) { // Graceful shutdown. break; } throw; } } }