コード例 #1
0
        protected override async Task <Response> ImplDownloadAsync(Request request)
        {
            var response = new Response
            {
                Request = request
            };

            HttpResponseMessage httpResponseMessage = null;
            WebProxy            proxy = null;

            try
            {
                var httpRequestMessage = GenerateHttpRequestMessage(request);

                if (UseProxy)
                {
                    if (HttpProxyPool == null)
                    {
                        throw new SpiderException("未正确配置代理池");
                    }
                    else
                    {
                        proxy = HttpProxyPool.GetProxy();
                        if (proxy == null)
                        {
                            throw new SpiderException("没有可用的代理");
                        }
                    }
                }

                var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy);
                httpResponseMessage = await httpClientEntry.HttpClient.SendAsync(httpRequestMessage);

                httpResponseMessage.EnsureSuccessStatusCode();
                response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;
                var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result;
                if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
                {
                    if (!DownloadFile)
                    {
                        StorageFile(request, bytes);
                    }
                }
                else
                {
                    var content = ReadContent(request, bytes,
                                              httpResponseMessage.Content.Headers.ContentType.CharSet);

                    if (DecodeHtml)
                    {
#if NETFRAMEWORK
                        content =
                            System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content.ToString()), string.IsNullOrEmpty(request.EncodingName) ? Encoding.UTF8 : Encoding.GetEncoding(request.EncodingName));
#else
                        content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
#endif
                    }

                    response.RawText = content;
                }

                response.Success = true;
            }
            catch (Exception e)
            {
                response.Exception = e.Message;
                response.Success   = false;
                Logger.LogError($"任务 {request.OwnerId} 下载 {request.Url} 失败: {e}");
            }
            finally
            {
                if (HttpProxyPool != null && proxy != null)
                {
                    HttpProxyPool.ReturnProxy(proxy,
                                              httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable);
                }

                try
                {
                    httpResponseMessage?.Dispose();
                }
                catch (Exception e)
                {
                    Logger.LogWarning($"任务 {request.OwnerId} 释放 {request.Url} 失败: {e}");
                }
            }

            return(response);
        }
コード例 #2
0
        protected override async Task <Response> ImplDownloadAsync(Request request)
        {
            var response = new Response {
                Request = request
            };

            for (var i = 0; i < RetryTime; ++i)
            {
                HttpResponseMessage httpResponseMessage = null;
                WebProxy            proxy = null;
                try
                {
                    var httpRequestMessage = GenerateHttpRequestMessage(request);

                    if (UseProxy)
                    {
                        if (HttpProxyPool == null)
                        {
                            response.Exception = "HttpProxyPool is null";
                            response.Success   = false;
                            Logger?.LogError(
                                $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}");
                            return(response);
                        }
                        else
                        {
                            proxy = HttpProxyPool.GetProxy();
                            if (proxy == null)
                            {
                                response.Exception = "There is no available proxy";
                                response.Success   = false;
                                Logger?.LogError(
                                    $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}");
                                return(response);
                            }
                        }
                    }

                    var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy);


                    httpResponseMessage = Framework.NetworkCenter == null
                                                ? await httpClientEntry.HttpClient.SendAsync(httpRequestMessage)
                                                : await Framework.NetworkCenter.Execute(async() =>
                                                                                        await httpClientEntry.HttpClient.SendAsync(httpRequestMessage));

                    httpResponseMessage.EnsureSuccessStatusCode();
                    response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;
                    response.MediaType = httpResponseMessage.Content.Headers.ContentType.MediaType;
                    response.CharSet   = httpResponseMessage.Content.Headers.ContentType.CharSet;
                    response.Content   = await httpResponseMessage.Content.ReadAsByteArrayAsync();

//					if (!ExcludeMediaTypes.Any(t =>
//						httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
//					{
//						if (!DownloadFile)
//						{
//							StorageFile(request, bytes);
//						}
//					}
//					else
//					{
//						var content = ReadContent(request, bytes,
//							httpResponseMessage.Content.Headers.ContentType.CharSet);
//
//						if (DecodeHtml)
//						{
//#if NETFRAMEWORK
//                            content = System.Web.HttpUtility.UrlDecode(
//                                System.Web.HttpUtility.HtmlDecode(content),
//                                string.IsNullOrEmpty(request.Encoding)
//                                    ? Encoding.UTF8
//                                    : Encoding.GetEncoding(request.Encoding));
//#else
//							content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
//#endif
//						}
//
//						response.RawText = content;
//					}

                    if (!string.IsNullOrWhiteSpace(request.ChangeIpPattern))
                    {
                        var rawtext = ResponseExtensions.ReadContent(request, response.Content, response.CharSet);

                        if (Regex.IsMatch(rawtext, request.ChangeIpPattern))
                        {
                            if (UseProxy)
                            {
                                response.TargetUrl = null;
                                response.Content   = null;
                                response.Success   = false;
                                // 把代理设置为空,影响 final 代码块里不作归还操作,等于删除此代理
                                proxy = null;
                            }
                            else
                            {
                                // 不支持切换 IP
                                if (Framework.NetworkCenter == null ||
                                    !Framework.NetworkCenter.SupportAdsl)
                                {
                                    response.Success   = false;
                                    response.Exception = "IP Banded";
                                    Logger?.LogError(
                                        $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}");
                                    return(response);
                                }
                                else
                                {
                                    Framework.NetworkCenter.Redial();
                                }
                            }
                        }
                    }
                    else
                    {
                        response.Success = true;
                        Logger?.LogInformation(
                            $"{request.OwnerId} download {request.Url} success");
                        return(response);
                    }
                }
                catch (Exception e)
                {
                    response.Exception = e.Message;
                    response.Success   = false;
                    Logger?.LogError($"{request.OwnerId} download {request.Url} failed [{i}]: {e}");
                }
                finally
                {
                    if (HttpProxyPool != null && proxy != null)
                    {
                        HttpProxyPool.ReturnProxy(proxy,
                                                  httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable);
                    }

                    try
                    {
                        httpResponseMessage?.Dispose();
                    }
                    catch (Exception e)
                    {
                        Logger?.LogWarning($"{request.OwnerId} dispose response {request.Url} failed [{i}]: {e}");
                    }
                }

                // 下载失败需要等待一秒,防止频率过高。
                // TODO: 改成可配置
                Thread.Sleep(1000);
            }

            return(response);
        }
コード例 #3
0
        protected override async Task <Response> ImplDownloadAsync(Request request)
        {
            var response = new Response
            {
                Request = request
            };

            for (int i = 0; i < RetryTime; ++i)
            {
                HttpResponseMessage httpResponseMessage = null;
                WebProxy            proxy = null;
                try
                {
                    var httpRequestMessage = GenerateHttpRequestMessage(request);

                    if (UseProxy)
                    {
                        if (HttpProxyPool == null)
                        {
                            response.Exception = "未正确配置代理池";
                            response.Success   = false;
                            Logger?.LogError(
                                $"任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {response.Exception}");
                            return(response);
                        }
                        else
                        {
                            proxy = HttpProxyPool.GetProxy();
                            if (proxy == null)
                            {
                                response.Exception = "没有可用的代理";
                                response.Success   = false;
                                Logger?.LogError(
                                    $"任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {response.Exception}");
                                return(response);
                            }
                        }
                    }

                    var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy);


                    httpResponseMessage = Framework.NetworkCenter == null
                                                ? await httpClientEntry.HttpClient.SendAsync(httpRequestMessage)
                                                : await Framework.NetworkCenter.Execute(async() =>
                                                                                        await httpClientEntry.HttpClient.SendAsync(httpRequestMessage));

                    httpResponseMessage.EnsureSuccessStatusCode();
                    response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri;
                    var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result;
                    if (!ExcludeMediaTypes.Any(t =>
                                               httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t)))
                    {
                        if (!DownloadFile)
                        {
                            StorageFile(request, bytes);
                        }
                    }
                    else
                    {
                        var content = ReadContent(request, bytes,
                                                  httpResponseMessage.Content.Headers.ContentType.CharSet);

                        if (DecodeHtml)
                        {
#if NETFRAMEWORK
                            content = System.Web.HttpUtility.UrlDecode(
                                System.Web.HttpUtility.HtmlDecode(content),
                                string.IsNullOrEmpty(request.Encoding)
                                    ? Encoding.UTF8
                                    : Encoding.GetEncoding(request.Encoding));
#else
                            content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content));
#endif
                        }

                        response.RawText = content;
                    }

                    if (!string.IsNullOrWhiteSpace(request.ChangeIpPattern) &&
                        Regex.IsMatch(response.RawText, request.ChangeIpPattern))
                    {
                        if (UseProxy)
                        {
                            response.TargetUrl = null;
                            response.RawText   = null;
                            response.Success   = false;
                            // 把代理设置为空,影响 final 代码块里不作归还操作,等于删除此代理
                            proxy = null;
                        }
                        else
                        {
                            // 不支持切换 IP
                            if (Framework.NetworkCenter == null ||
                                !Framework.NetworkCenter.SupportAdsl)
                            {
                                response.Success   = false;
                                response.Exception = "IP Banded";
                                Logger?.LogError(
                                    $"任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {response.Exception}");
                                return(response);
                            }
                            else
                            {
                                Framework.NetworkCenter.Redial();
                            }
                        }
                    }
                    else
                    {
                        response.Success = true;
                        Logger?.LogInformation(
                            $"任务 {request.OwnerId} 下载 {request.Url} 成功");
                        return(response);
                    }
                }
                catch (Exception e)
                {
                    response.Exception = e.Message;
                    response.Success   = false;
                    Logger?.LogError($"任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {e}");
                }
                finally
                {
                    if (HttpProxyPool != null && proxy != null)
                    {
                        HttpProxyPool.ReturnProxy(proxy,
                                                  httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable);
                    }

                    try
                    {
                        httpResponseMessage?.Dispose();
                    }
                    catch (Exception e)
                    {
                        Logger?.LogWarning($"任务 {request.OwnerId} 释放 {request.Url} 失败 [{i}]: {e}");
                    }
                }
            }

            return(response);
        }