Exemple #1
0
        protected override async Task <Response> ImplDownloadAsync(Request request)
        {
            var response = new Response
            {
                Request = request
            };

            for (int i = 0; i < RetryTime; ++i)
            {
                HttpResponseMessage httpResponseMessage = null;
                WebProxy            proxy = null;
                try
                {
                    var httpRequestMessage = GenerateHttpRequestMessage(request);

                    if (UseProxy)
                    {
                        if (HttpProxyPool == null)
                        {
                            response.Exception = "HttpProxyPool is null";
                            response.Success   = false;
                            Logger?.LogError(
                                $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}");
                            return(response);
                        }
                        else
                        {
                            proxy = HttpProxyPool.GetProxy();
                            if (proxy == null)
                            {
                                response.Exception = "There is no available proxy";
                                response.Success   = false;
                                Logger?.LogError(
                                    $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}");
                                return(response);
                            }
                        }
                    }

                    var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy);


                    httpResponseMessage = Framework.NetworkCenter == null
                                                ? await httpClientEntry.HttpClient.SendAsync(httpRequestMessage)
                                                : await Framework.NetworkCenter.Execute(async() =>
                                                                                        await httpClientEntry.HttpClient.SendAsync(httpRequestMessage));

                    httpResponseMessage.EnsureSuccessStatusCode();
                    response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;
                    var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result;
                    if (!ExcludeMediaTypes.Any(t =>
                                               httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
                    {
                        if (!DownloadFile)
                        {
                            StorageFile(request, bytes);
                        }
                    }
                    else
                    {
                        var content = ReadContent(request, bytes,
                                                  httpResponseMessage.Content.Headers.ContentType.CharSet);

                        if (DecodeHtml)
                        {
#if NETFRAMEWORK
                            content = System.Web.HttpUtility.UrlDecode(
                                System.Web.HttpUtility.HtmlDecode(content),
                                string.IsNullOrEmpty(request.Encoding)
                                    ? Encoding.UTF8
                                    : Encoding.GetEncoding(request.Encoding));
#else
                            content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
#endif
                        }

                        response.RawText = content;
                    }

                    if (!string.IsNullOrWhiteSpace(request.ChangeIpPattern) &&
                        Regex.IsMatch(response.RawText, request.ChangeIpPattern))
                    {
                        if (UseProxy)
                        {
                            response.TargetUrl = null;
                            response.RawText   = null;
                            response.Success   = false;
                            // 把代理设置为空,影响 final 代码块里不作归还操作,等于删除此代理
                            proxy = null;
                        }
                        else
                        {
                            // 不支持切换 IP
                            if (Framework.NetworkCenter == null ||
                                !Framework.NetworkCenter.SupportAdsl)
                            {
                                response.Success   = false;
                                response.Exception = "IP Banded";
                                Logger?.LogError(
                                    $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}");
                                return(response);
                            }
                            else
                            {
                                Framework.NetworkCenter.Redial();
                            }
                        }
                    }
                    else
                    {
                        response.Success = true;
                        Logger?.LogInformation(
                            $"{request.OwnerId} download {request.Url} success");
                        return(response);
                    }
                }
                catch (Exception e)
                {
                    response.Exception = e.Message;
                    response.Success   = false;
                    Logger?.LogError($"{request.OwnerId} download {request.Url} failed [{i}]: {e}");
                }
                finally
                {
                    if (HttpProxyPool != null && proxy != null)
                    {
                        HttpProxyPool.ReturnProxy(proxy,
                                                  httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable);
                    }

                    try
                    {
                        httpResponseMessage?.Dispose();
                    }
                    catch (Exception e)
                    {
                        Logger?.LogWarning($"{request.OwnerId} dispose response {request.Url} failed [{i}]: {e}");
                    }
                }

                // 下载失败需要等待一秒,防止频率过高。
                // TODO: 改成可配置
                Thread.Sleep(1000);
            }

            return(response);
        }
Exemple #2
0
        protected override async Task <Response> ImplDownloadAsync(Request request)
        {
            var response = new Response
            {
                Request = request
            };

            HttpResponseMessage httpResponseMessage = null;
            WebProxy            proxy = null;

            try
            {
                var httpRequestMessage = GenerateHttpRequestMessage(request);

                if (UseProxy)
                {
                    if (HttpProxyPool == null)
                    {
                        throw new SpiderException("未正确配置代理池");
                    }
                    else
                    {
                        proxy = HttpProxyPool.GetProxy();
                        if (proxy == null)
                        {
                            throw new SpiderException("没有可用的代理");
                        }
                    }
                }

                var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy);
                httpResponseMessage = await httpClientEntry.HttpClient.SendAsync(httpRequestMessage);

                httpResponseMessage.EnsureSuccessStatusCode();
                response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;
                var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result;
                if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
                {
                    if (!DownloadFile)
                    {
                        StorageFile(request, bytes);
                    }
                }
                else
                {
                    var content = ReadContent(request, bytes,
                                              httpResponseMessage.Content.Headers.ContentType.CharSet);

                    if (DecodeHtml)
                    {
#if NETFRAMEWORK
                        content =
                            System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content.ToString()), string.IsNullOrEmpty(request.EncodingName) ? Encoding.UTF8 : Encoding.GetEncoding(request.EncodingName));
#else
                        content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
#endif
                    }

                    response.RawText = content;
                }

                response.Success = true;
            }
            catch (Exception e)
            {
                response.Exception = e.Message;
                response.Success   = false;
                Logger.LogError($"任务 {request.OwnerId} 下载 {request.Url} 失败: {e}");
            }
            finally
            {
                if (HttpProxyPool != null && proxy != null)
                {
                    HttpProxyPool.ReturnProxy(proxy,
                                              httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable);
                }

                try
                {
                    httpResponseMessage?.Dispose();
                }
                catch (Exception e)
                {
                    Logger.LogWarning($"任务 {request.OwnerId} 释放 {request.Url} 失败: {e}");
                }
            }

            return(response);
        }
Exemple #3
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="request"></param>
        /// <returns></returns>
        protected override async Task <Response> ImplDownloadAsync(Request request)
        {
            var response = new Response
            {
                Request = request
            };

            for (int i = 0; i < RetryTime; ++i)
            {
                HttpResponseMessage httpResponseMessage = null;
                WebProxy            proxy = null;
                try
                {
                    var httpRequestMessage = GenerateHttpRequestMessage(request);

                    #region UseProxy
                    if (UseProxy)
                    {
                        if (HttpProxyPool == null)
                        {
                            response.Exception = "未正确配置代理池";
                            response.Success   = false;
                            Logger?.LogError(
                                $"HttpClientDownloaded 任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {response.Exception}");
                            return(response);
                        }
                        else
                        {
                            proxy = HttpProxyPool.GetProxy();
                            if (proxy == null)
                            {
                                response.Exception = "没有可用的代理";
                                response.Success   = false;
                                Logger?.LogError(
                                    $"HttpClientDownloaded 任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {response.Exception}");
                                return(response);
                            }
                        }
                    }
                    #endregion

                    var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy);

                    try
                    {
                        //TODO 目前异常问题还未处理 后期捕获异常问题再处理
                        httpResponseMessage = await httpClientEntry.HttpClient.SendAsync(httpRequestMessage);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }


                    httpResponseMessage.EnsureSuccessStatusCode();

                    response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;

                    var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result;
                    if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
                    {
                        //存储文件
                        if (!DownloadFile)
                        {
                            StorageFile(request, bytes);
                        }
                    }
                    else
                    {
                        var content = ReadContent(request, bytes, httpResponseMessage.Content.Headers.ContentType.CharSet);

                        if (DecodeHtml)
                        {
#if NETFRAMEWORK
                            content = System.Web.HttpUtility.UrlDecode(
                                System.Web.HttpUtility.HtmlDecode(content),
                                string.IsNullOrEmpty(request.Encoding)
                                    ? Encoding.UTF8
                                    : Encoding.GetEncoding(request.Encoding));
#else
                            content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
#endif
                        }

                        response.RawText = content;
                    }

                    if (!string.IsNullOrWhiteSpace(request.ChangeIpPattern) &&
                        Regex.IsMatch(response.RawText, request.ChangeIpPattern))
                    {
                        if (UseProxy)
                        {
                            response.TargetUrl = null;
                            response.RawText   = null;
                            response.Success   = false;
                            // 把代理设置为空,影响 final 代码块里不作归还操作,等于删除此代理
                            proxy = null;
                        }
                    }
                    else
                    {
                        response.Success = true;
                        Logger?.LogInformation(
                            $"任务 {request.OwnerId} 下载 {request.Url} 成功");
                        return(response);
                    }
                }
                catch (Exception e)
                {
                    response.Exception = e.Message;
                    response.Success   = false;
                    Logger?.LogError($"任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {e}");
                }
                finally
                {
                    if (HttpProxyPool != null && proxy != null)
                    {
                        HttpProxyPool.ReturnProxy(proxy,
                                                  httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable);
                    }

                    try
                    {
                        httpResponseMessage?.Dispose();
                    }
                    catch (Exception e)
                    {
                        Logger?.LogWarning($"任务 {request.OwnerId} 释放 {request.Url} 失败 [{i}]: {e}");
                    }
                }
            }

            return(response);
        }
        protected override Response DowloadContent(Request request)
        {
            var response = new Response(request);

            if (IfFileExists(request))
            {
                Logger?.LogInformation($"File {request.Url} already exists.");
                return(response);
            }

            var httpRequestMessage = GenerateHttpRequestMessage(request);
            HttpResponseMessage httpResponseMessage = null;
            WebProxy            proxy = null;

            try
            {
                if (UseFiddlerProxy)
                {
                    if (FiddlerProxy == null)
                    {
                        throw new DownloaderException("Fiddler proxy is null.");
                    }
                    else
                    {
                        proxy = FiddlerProxy;
                    }
                }
                else
                {
                    if (HttpProxyPool.Instance != null)
                    {
                        proxy = HttpProxyPool.Instance.GetProxy();
                        if (proxy == null)
                        {
                            throw new DownloaderException("No available proxy.");
                        }
                    }
                    else
                    {
                        _clientObject = GetHttpClient("DEFAULT", AllowAutoRedirect, null);
                    }
                }

                _clientObject = GetHttpClient(proxy == null ? "DEFAULT" : $"{proxy.Address}",
                                              AllowAutoRedirect, proxy);

                httpResponseMessage =
                    NetworkCenter.Current.Execute("downloader", () => Task
                                                  .Run(async() => await _clientObject.Client.SendAsync(httpRequestMessage))
                                                  .GetAwaiter()
                                                  .GetResult());

                response.StatusCode = httpResponseMessage.StatusCode;
                EnsureSuccessStatusCode(response.StatusCode);
                response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;

                var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result;
                if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
                {
                    if (!DownloadFiles)
                    {
                        Logger?.LogWarning($"Ignore {request.Url} because media type is not allowed to download.");
                    }
                    else
                    {
                        StorageFile(request, bytes);
                    }
                }
                else
                {
                    var content = ReadContent(request, bytes,
                                              httpResponseMessage.Content.Headers.ContentType.CharSet);

                    if (_decodeHtml && content is string)
                    {
#if NETFRAMEWORK
                        content =
                            System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content.ToString()), string.IsNullOrEmpty(request.EncodingName) ? Encoding.UTF8 : Encoding.GetEncoding(request.EncodingName));
#else
                        content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content.ToString()));
#endif
                    }

                    response.Content = content;

                    DetectContentType(response, httpResponseMessage.Content.Headers.ContentType.MediaType);
                }
            }
            catch (DownloaderException)
            {
                throw;
            }
            catch (Exception e)
            {
                throw new DownloaderException($"Unexpected exception when download request: {request.Url}: {e}.");
            }
            finally
            {
                if (HttpProxyPool.Instance != null && proxy != null)
                {
                    HttpProxyPool.Instance.ReturnProxy(proxy,
                                                       httpResponseMessage == null
                                                        ? HttpStatusCode.ServiceUnavailable
                                                        : httpResponseMessage.StatusCode);
                }

                try
                {
                    httpResponseMessage?.Dispose();
                }
                catch (Exception e)
                {
                    throw new BypassedDownloaderException($"Close response {request.Url} failed: {e.Message}");
                }
            }

            return(response);
        }
        protected override Response DowloadContent(Request request)
        {
            Response response = new Response();

            response.Request = request;

            if (IfFileExists(request))
            {
                Logger.Information($"File {request.Url} already exists.");
                return(response);
            }

            HttpWebRequest  httpWebRequest  = null;
            HttpWebResponse httpWebResponse = null;

            try
            {
                httpWebRequest  = GenerateHttpWebRequest(request);
                httpWebResponse =
                    NetworkCenter.Current.Execute("downloader", () => (HttpWebResponse)httpWebRequest.GetResponse());
                response.StatusCode = httpWebResponse.StatusCode;
                EnsureSuccessStatusCode(response.StatusCode);
                response.TargetUrl = httpWebResponse.ResponseUri.ToString();
                var bytes = ReadResponseStream(httpWebResponse);
                if (!ExcludeMediaTypes.Any(t => httpWebResponse.ContentType.Contains(t)))
                {
                    if (!DownloadFiles)
                    {
                        Logger.Warning($"Ignore {request.Url} because media type is not allowed to download.");
                    }
                    else
                    {
                        StorageFile(request, bytes);
                    }
                }
                else
                {
                    string content = ReadContent(request, bytes, httpWebResponse.ContentType);

                    if (_decodeHtml)
                    {
#if NETFRAMEWORK
                        content =
                            System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content), string.IsNullOrEmpty(request.EncodingName) ? Encoding.UTF8 : Encoding.GetEncoding(request.EncodingName));
#else
                        content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
#endif
                    }

                    response.Content = content;

                    DetectContentType(response, httpWebResponse.ContentType);
                }
            }
            catch (DownloaderException)
            {
                throw;
            }
            catch (Exception e)
            {
                throw new DownloaderException($"Unexpected exception when download request: {request.Url}: {e}.");
            }
            finally
            {
                if (httpWebRequest != null)
                {
                    var proxy = httpWebRequest.Proxy as WebProxy;
                    if (HttpProxyPool.Instance != null && proxy != null && httpWebRequest.Proxy != FiddlerProxy)
                    {
                        HttpProxyPool.Instance.ReturnProxy(proxy, httpWebResponse == null ? HttpStatusCode.ServiceUnavailable : httpWebResponse.StatusCode);
                    }
                    try
                    {
                        httpWebResponse.Close();
                    }
                    catch (Exception e)
                    {
                        throw new BypassedDownloaderException($"Close response {request.Url} failed: {e.Message}");
                    }
                }
            }

            return(response);
        }