public HttpClientItem GetHttpClient(int?hashCode = null, Cookies cookies = null) { if (hashCode == null) { if (_defaultHttpClientItem == null) { _defaultHttpClientItem = CreateDefaultHttpClient(cookies.GetCookies()); } return(_defaultHttpClientItem); } _getHttpClientCount++; if (_getHttpClientCount % 100 == 0) { ClearHttpClient(); } if (_pool.ContainsKey(hashCode.Value)) { _pool[hashCode.Value].LastUsedTime = DateTime.Now; return(_pool[hashCode.Value]); } else { var item = CreateDefaultHttpClient(cookies.GetCookies()); _pool.TryAdd(hashCode.Value, item); return(item); } }
public HttpClientItem GetHttpClient(ISpider spider, IDownloader downloader, CookieContainer cookieContainer, int?hashCode = null, ICookieInjector cookieInjector = null) { if (cookieContainer == null) { throw new SpiderException($"{nameof(cookieContainer)} should not be null."); } if (downloader == null) { throw new SpiderException($"{nameof(downloader)} should not be null."); } var newCookieContainer = GenerateNewCookieContainer(spider, downloader, cookieContainer, cookieInjector); if (hashCode == null) { return(_defaultHttpClientItem ?? (_defaultHttpClientItem = CreateDefaultHttpClient(newCookieContainer))); } _getHttpClientCount++; if (_getHttpClientCount % 100 == 0) { ClearHttpClient(); } if (_pool.ContainsKey(hashCode.Value)) { _pool[hashCode.Value].LastUsedTime = DateTime.Now; return(_pool[hashCode.Value]); } else { var item = CreateDefaultHttpClient(newCookieContainer); _pool.TryAdd(hashCode.Value, item); return(item); } }
/// <summary> /// HTTP下载的实现 /// </summary> /// <param name="request">请求信息</param> /// <param name="spider">爬虫</param> /// <returns>页面数据</returns> protected override Page DowloadContent(Request request, ISpider spider) { HttpResponseMessage response = null; try { var httpMessage = GenerateHttpRequestMessage(request, spider.Site); HttpClientItem httpClientItem = null; if (spider.Site.HttpProxyPool == null) { httpClientItem = HttpClientPool.GetHttpClient(request.DownloaderGroup, spider.Cookies); } else { // TODO: 代理模式下: request.DownloaderGroup 再考虑 var proxy = spider.Site.HttpProxyPool.GetProxy(); request.Proxy = proxy; httpClientItem = HttpClientPool.GetHttpClient(proxy?.GetHashCode(), spider.Cookies); httpClientItem.Handler.Proxy = httpClientItem.Handler.Proxy ?? proxy; } if (httpClientItem.Client.Timeout.TotalSeconds != _timeout) { httpClientItem.Client.Timeout = new TimeSpan(0, 0, _timeout); } response = NetworkCenter.Current.Execute("http", () => httpClientItem.Client.SendAsync(httpMessage).Result); request.StatusCode = response.StatusCode; response.EnsureSuccessStatusCode(); Page page; if (response.Content.Headers.ContentType != null && !ExcludeMediaTypes.Contains(response.Content.Headers.ContentType.MediaType)) { if (!spider.Site.DownloadFiles) { Logger.AllLog(spider.Identity, $"Ignore: {request.Url} because media type is not allowed to download.", LogLevel.Warn); return(new Page(request) { Skip = true }); } else { page = SaveFile(request, response, spider); } } else { page = HandleResponse(request, response, spider.Site); if (string.IsNullOrEmpty(page.Content)) { Logger.AllLog(spider.Identity, $"Content is empty: {request.Url}.", LogLevel.Warn); } } page.TargetUrl = response.RequestMessage.RequestUri.AbsoluteUri; return(page); } catch (DownloadException de) { Page page = spider.Site.CycleRetryTimes > 0 ? Spider.AddToCycleRetry(request, spider.Site) : new Page(request); if (page != null) { page.Exception = de; } Logger.AllLog(spider.Identity, $"Download {request.Url} failed: {de.Message}", LogLevel.Warn); return(page); } catch (HttpRequestException he) { Page page = spider.Site.CycleRetryTimes > 0 ? Spider.AddToCycleRetry(request, spider.Site) : new Page(request); if (page != null) { page.Exception = he; } Logger.AllLog(spider.Identity, $"Download {request.Url} failed: {he.Message}.", LogLevel.Warn); return(page); } catch (Exception e) { Page page = new Page(request) { Exception = e, Skip = true }; Logger.AllLog(spider.Identity, $"Download {request.Url} failed: {e.Message}.", LogLevel.Error, e); return(page); } finally { try { response?.Dispose(); } catch (Exception e) { Logger.AllLog(spider.Identity, $"Close response fail: {e}", LogLevel.Error, e); } } }