Ejemplo n.º 1
0
        public HttpClientItem GetHttpClient(int?hashCode = null, Cookies cookies = null)
        {
            if (hashCode == null)
            {
                if (_defaultHttpClientItem == null)
                {
                    _defaultHttpClientItem = CreateDefaultHttpClient(cookies.GetCookies());
                }
                return(_defaultHttpClientItem);
            }

            _getHttpClientCount++;

            if (_getHttpClientCount % 100 == 0)
            {
                ClearHttpClient();
            }

            if (_pool.ContainsKey(hashCode.Value))
            {
                _pool[hashCode.Value].LastUsedTime = DateTime.Now;
                return(_pool[hashCode.Value]);
            }
            else
            {
                var item = CreateDefaultHttpClient(cookies.GetCookies());
                _pool.TryAdd(hashCode.Value, item);
                return(item);
            }
        }
Ejemplo n.º 2
0
        public HttpClientItem GetHttpClient(ISpider spider, IDownloader downloader, CookieContainer cookieContainer, int?hashCode = null, ICookieInjector cookieInjector = null)
        {
            if (cookieContainer == null)
            {
                throw new SpiderException($"{nameof(cookieContainer)} should not be null.");
            }
            if (downloader == null)
            {
                throw new SpiderException($"{nameof(downloader)} should not be null.");
            }
            var newCookieContainer = GenerateNewCookieContainer(spider, downloader, cookieContainer, cookieInjector);

            if (hashCode == null)
            {
                return(_defaultHttpClientItem ?? (_defaultHttpClientItem = CreateDefaultHttpClient(newCookieContainer)));
            }

            _getHttpClientCount++;

            if (_getHttpClientCount % 100 == 0)
            {
                ClearHttpClient();
            }

            if (_pool.ContainsKey(hashCode.Value))
            {
                _pool[hashCode.Value].LastUsedTime = DateTime.Now;
                return(_pool[hashCode.Value]);
            }
            else
            {
                var item = CreateDefaultHttpClient(newCookieContainer);
                _pool.TryAdd(hashCode.Value, item);
                return(item);
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// HTTP下载的实现
        /// </summary>
        /// <param name="request">请求信息</param>
        /// <param name="spider">爬虫</param>
        /// <returns>页面数据</returns>
        protected override Page DowloadContent(Request request, ISpider spider)
        {
            HttpResponseMessage response = null;

            try
            {
                var httpMessage = GenerateHttpRequestMessage(request, spider.Site);

                HttpClientItem httpClientItem = null;
                if (spider.Site.HttpProxyPool == null)
                {
                    httpClientItem = HttpClientPool.GetHttpClient(request.DownloaderGroup, spider.Cookies);
                }
                else
                {
                    // TODO: 代理模式下: request.DownloaderGroup 再考虑
                    var proxy = spider.Site.HttpProxyPool.GetProxy();
                    request.Proxy  = proxy;
                    httpClientItem = HttpClientPool.GetHttpClient(proxy?.GetHashCode(), spider.Cookies);
                    httpClientItem.Handler.Proxy = httpClientItem.Handler.Proxy ?? proxy;
                }
                if (httpClientItem.Client.Timeout.TotalSeconds != _timeout)
                {
                    httpClientItem.Client.Timeout = new TimeSpan(0, 0, _timeout);
                }

                response           = NetworkCenter.Current.Execute("http", () => httpClientItem.Client.SendAsync(httpMessage).Result);
                request.StatusCode = response.StatusCode;
                response.EnsureSuccessStatusCode();

                Page page;

                if (response.Content.Headers.ContentType != null && !ExcludeMediaTypes.Contains(response.Content.Headers.ContentType.MediaType))
                {
                    if (!spider.Site.DownloadFiles)
                    {
                        Logger.AllLog(spider.Identity, $"Ignore: {request.Url} because media type is not allowed to download.", LogLevel.Warn);
                        return(new Page(request)
                        {
                            Skip = true
                        });
                    }
                    else
                    {
                        page = SaveFile(request, response, spider);
                    }
                }
                else
                {
                    page = HandleResponse(request, response, spider.Site);

                    if (string.IsNullOrEmpty(page.Content))
                    {
                        Logger.AllLog(spider.Identity, $"Content is empty: {request.Url}.", LogLevel.Warn);
                    }
                }

                page.TargetUrl = response.RequestMessage.RequestUri.AbsoluteUri;

                return(page);
            }
            catch (DownloadException de)
            {
                Page page = spider.Site.CycleRetryTimes > 0 ? Spider.AddToCycleRetry(request, spider.Site) : new Page(request);

                if (page != null)
                {
                    page.Exception = de;
                }
                Logger.AllLog(spider.Identity, $"Download {request.Url} failed: {de.Message}", LogLevel.Warn);

                return(page);
            }
            catch (HttpRequestException he)
            {
                Page page = spider.Site.CycleRetryTimes > 0 ? Spider.AddToCycleRetry(request, spider.Site) : new Page(request);
                if (page != null)
                {
                    page.Exception = he;
                }

                Logger.AllLog(spider.Identity, $"Download {request.Url} failed: {he.Message}.", LogLevel.Warn);
                return(page);
            }
            catch (Exception e)
            {
                Page page = new Page(request)
                {
                    Exception = e,
                    Skip      = true
                };

                Logger.AllLog(spider.Identity, $"Download {request.Url} failed: {e.Message}.", LogLevel.Error, e);
                return(page);
            }
            finally
            {
                try
                {
                    response?.Dispose();
                }
                catch (Exception e)
                {
                    Logger.AllLog(spider.Identity, $"Close response fail: {e}", LogLevel.Error, e);
                }
            }
        }