/// <summary>
        /// Get a <see cref="HttpClientObject"/> from <see cref="IHttpClientPool"/>.
        /// Return same <see cref="HttpClientObject"/> instance when <paramref name="hash"/> is same.
        /// This can ensure some pages have same CookieContainer.
        /// </summary>
        /// <summary xml:lang="zh-CN">
        /// 通过不同的Hash分组, 返回对应的HttpClient
        /// 设计初衷: 某些网站会对COOKIE某部分做承上启下的检测, 因此必须保证: www.a.com/keyword=xxxx&amp;page=1 www.a.com/keyword=xxxx&amp;page=2 在同一个HttpClient里访问
        /// </summary>
        /// <param name="hash">分组的哈希 Hashcode to identify different group.</param>
        /// <param name="allowAutoRedirect">是否自动跳转</param>
        /// <param name="proxy">代理</param>
        /// <returns>HttpClientItem</returns>
        private HttpClientObject GetHttpClient(string hash, bool allowAutoRedirect, IWebProxy proxy)
        {
            if (string.IsNullOrWhiteSpace(hash))
            {
                hash = string.Empty;
            }
            Interlocked.Increment(ref _getHttpClientCount);

            if (_getHttpClientCount % 100 == 0)
            {
                CleanupPool();
            }

            if (_pool.ContainsKey(hash))
            {
                _pool[hash].LastUseTime = DateTime.Now;
                return(_pool[hash]);
            }
            else
            {
                var handler = new HttpClientHandler
                {
                    AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip,
                    UseProxy                 = true,
                    UseCookies               = true,
                    AllowAutoRedirect        = false,
                    MaxAutomaticRedirections = 10,
                    Proxy           = proxy,
                    CookieContainer = CopyCookieContainer()
                };
                var item = new HttpClientObject(handler, allowAutoRedirect);
                _pool.Add(hash, item);
                return(item);
            }
        }
Exemplo n.º 2
0
        protected override Response DowloadContent(Request request)
        {
            Response response = new Response();

            response.Request = request;

            if (IfFileExists(request))
            {
                Logger.Information($"File {request.Url} already exists.");
                return(response);
            }

            var httpRequestMessage = GenerateHttpRequestMessage(request);
            HttpResponseMessage httpResponseMessage = null;
            WebProxy            proxy = null;

            try
            {
                if (UseFiddlerProxy)
                {
                    if (FiddlerProxy == null)
                    {
                        throw new DownloaderException("Fiddler proxy is null.");
                    }
                    else
                    {
                        proxy = FiddlerProxy;
                    }
                }
                else
                {
                    if (HttpProxyPool.Instance != null)
                    {
                        proxy = HttpProxyPool.Instance.GetProxy();
                        if (proxy == null)
                        {
                            throw new DownloaderException("No avaliable proxy.");
                        }
                    }
                    else
                    {
                        _clientObject = GetHttpClient("DEFAULT", AllowAutoRedirect, null);
                    }
                }

                _clientObject = GetHttpClient(proxy == null ? "DEFAULT" : $"{proxy.Address.ToString()}", AllowAutoRedirect, proxy);

                httpResponseMessage =
                    NetworkCenter.Current.Execute("downloader", () => Task.Run(async() =>
                {
                    return(await _clientObject.Client.SendAsync(httpRequestMessage));
                })
                                                  .GetAwaiter()
                                                  .GetResult());

                response.StatusCode = httpResponseMessage.StatusCode;
                EnsureSuccessStatusCode(response.StatusCode);
                response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;

                var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result;
                if (!request.Site.ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
                {
                    if (!request.Site.DownloadFiles)
                    {
                        Logger.Warning($"Ignore {request.Url} because media type is not allowed to download.");
                    }
                    else
                    {
                        StorageFile(request, bytes);
                    }
                }
                else
                {
                    string content = ReadContent(bytes, httpResponseMessage.Content.Headers.ContentType.CharSet, request.Site);

                    if (_decodeHtml)
                    {
#if NETFRAMEWORK
                        content =
                            System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content), string.IsNullOrEmpty(request.Site.EncodingName) ? Encoding.Default : Encoding.GetEncoding(request.Site.EncodingName));
#else
                        content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
#endif
                    }

                    response.Content = content;

                    DetectContentType(response, httpResponseMessage.Content.Headers.ContentType.MediaType);
                }
            }
            catch (DownloaderException)
            {
                throw;
            }
            catch (Exception e)
            {
                throw new DownloaderException($"Unexpected exception when download request: {request.Url}: {e}.");
            }
            finally
            {
                if (HttpProxyPool.Instance != null && proxy != null)
                {
                    HttpProxyPool.Instance.ReturnProxy(proxy, httpResponseMessage == null ? HttpStatusCode.ServiceUnavailable : httpResponseMessage.StatusCode);
                }
                try
                {
                    httpResponseMessage?.Dispose();
                }
                catch (Exception e)
                {
                    throw new BypassedDownloaderException($"Close response {request.Url} failed: {e.Message}");
                }
            }

            return(response);
        }