protected override async Task <Response> ImplDownloadAsync(Request request) { var response = new Response { Request = request }; for (int i = 0; i < RetryTime; ++i) { HttpResponseMessage httpResponseMessage = null; WebProxy proxy = null; try { var httpRequestMessage = GenerateHttpRequestMessage(request); if (UseProxy) { if (HttpProxyPool == null) { response.Exception = "HttpProxyPool is null"; response.Success = false; Logger?.LogError( $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}"); return(response); } else { proxy = HttpProxyPool.GetProxy(); if (proxy == null) { response.Exception = "There is no available proxy"; response.Success = false; Logger?.LogError( $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}"); return(response); } } } var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy); httpResponseMessage = Framework.NetworkCenter == null ? await httpClientEntry.HttpClient.SendAsync(httpRequestMessage) : await Framework.NetworkCenter.Execute(async() => await httpClientEntry.HttpClient.SendAsync(httpRequestMessage)); httpResponseMessage.EnsureSuccessStatusCode(); response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri; var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result; if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t))) { if (!DownloadFile) { StorageFile(request, bytes); } } else { var content = ReadContent(request, bytes, httpResponseMessage.Content.Headers.ContentType.CharSet); if (DecodeHtml) { #if NETFRAMEWORK content = System.Web.HttpUtility.UrlDecode( System.Web.HttpUtility.HtmlDecode(content), string.IsNullOrEmpty(request.Encoding) ? Encoding.UTF8 : Encoding.GetEncoding(request.Encoding)); #else content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content)); #endif } response.RawText = content; } if (!string.IsNullOrWhiteSpace(request.ChangeIpPattern) && Regex.IsMatch(response.RawText, request.ChangeIpPattern)) { if (UseProxy) { response.TargetUrl = null; response.RawText = null; response.Success = false; // 把代理设置为空,影响 final 代码块里不作归还操作,等于删除此代理 proxy = null; } else { // 不支持切换 IP if (Framework.NetworkCenter == null || !Framework.NetworkCenter.SupportAdsl) { response.Success = false; response.Exception = "IP Banded"; Logger?.LogError( $"{request.OwnerId} download {request.Url} failed [{i}]: {response.Exception}"); return(response); } else { Framework.NetworkCenter.Redial(); } } } else { response.Success = true; Logger?.LogInformation( $"{request.OwnerId} download {request.Url} success"); return(response); } } catch (Exception e) { response.Exception = e.Message; response.Success = false; Logger?.LogError($"{request.OwnerId} download {request.Url} failed [{i}]: {e}"); } finally { if (HttpProxyPool != null && proxy != null) { HttpProxyPool.ReturnProxy(proxy, httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable); } try { httpResponseMessage?.Dispose(); } catch (Exception e) { Logger?.LogWarning($"{request.OwnerId} dispose response {request.Url} failed [{i}]: {e}"); } } // 下载失败需要等待一秒,防止频率过高。 // TODO: 改成可配置 Thread.Sleep(1000); } return(response); }
protected override async Task <Response> ImplDownloadAsync(Request request) { var response = new Response { Request = request }; HttpResponseMessage httpResponseMessage = null; WebProxy proxy = null; try { var httpRequestMessage = GenerateHttpRequestMessage(request); if (UseProxy) { if (HttpProxyPool == null) { throw new SpiderException("未正确配置代理池"); } else { proxy = HttpProxyPool.GetProxy(); if (proxy == null) { throw new SpiderException("没有可用的代理"); } } } var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy); httpResponseMessage = await httpClientEntry.HttpClient.SendAsync(httpRequestMessage); httpResponseMessage.EnsureSuccessStatusCode(); response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri; var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result; if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t))) { if (!DownloadFile) { StorageFile(request, bytes); } } else { var content = ReadContent(request, bytes, httpResponseMessage.Content.Headers.ContentType.CharSet); if (DecodeHtml) { #if NETFRAMEWORK content = System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content.ToString()), string.IsNullOrEmpty(request.EncodingName) ? Encoding.UTF8 : Encoding.GetEncoding(request.EncodingName)); #else content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content)); #endif } response.RawText = content; } response.Success = true; } catch (Exception e) { response.Exception = e.Message; response.Success = false; Logger.LogError($"任务 {request.OwnerId} 下载 {request.Url} 失败: {e}"); } finally { if (HttpProxyPool != null && proxy != null) { HttpProxyPool.ReturnProxy(proxy, httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable); } try { httpResponseMessage?.Dispose(); } catch (Exception e) { Logger.LogWarning($"任务 {request.OwnerId} 释放 {request.Url} 失败: {e}"); } } return(response); }
/// <summary> /// /// </summary> /// <param name="request"></param> /// <returns></returns> protected override async Task <Response> ImplDownloadAsync(Request request) { var response = new Response { Request = request }; for (int i = 0; i < RetryTime; ++i) { HttpResponseMessage httpResponseMessage = null; WebProxy proxy = null; try { var httpRequestMessage = GenerateHttpRequestMessage(request); #region UseProxy if (UseProxy) { if (HttpProxyPool == null) { response.Exception = "未正确配置代理池"; response.Success = false; Logger?.LogError( $"HttpClientDownloaded 任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {response.Exception}"); return(response); } else { proxy = HttpProxyPool.GetProxy(); if (proxy == null) { response.Exception = "没有可用的代理"; response.Success = false; Logger?.LogError( $"HttpClientDownloaded 任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {response.Exception}"); return(response); } } } #endregion var httpClientEntry = GetHttpClientEntry(proxy == null ? "DEFAULT" : $"{proxy.Address}", proxy); try { //TODO 目前异常问题还未处理 后期捕获异常问题再处理 httpResponseMessage = await httpClientEntry.HttpClient.SendAsync(httpRequestMessage); } catch (Exception ex) { Console.WriteLine(ex.Message); } httpResponseMessage.EnsureSuccessStatusCode(); response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri; var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result; if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t))) { //存储文件 if (!DownloadFile) { StorageFile(request, bytes); } } else { var content = ReadContent(request, bytes, httpResponseMessage.Content.Headers.ContentType.CharSet); if (DecodeHtml) { #if NETFRAMEWORK content = System.Web.HttpUtility.UrlDecode( System.Web.HttpUtility.HtmlDecode(content), string.IsNullOrEmpty(request.Encoding) ? Encoding.UTF8 : Encoding.GetEncoding(request.Encoding)); #else content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content)); #endif } response.RawText = content; } if (!string.IsNullOrWhiteSpace(request.ChangeIpPattern) && Regex.IsMatch(response.RawText, request.ChangeIpPattern)) { if (UseProxy) { response.TargetUrl = null; response.RawText = null; response.Success = false; // 把代理设置为空,影响 final 代码块里不作归还操作,等于删除此代理 proxy = null; } } else { response.Success = true; Logger?.LogInformation( $"任务 {request.OwnerId} 下载 {request.Url} 成功"); return(response); } } catch (Exception e) { response.Exception = e.Message; response.Success = false; Logger?.LogError($"任务 {request.OwnerId} 下载 {request.Url} 失败 [{i}]: {e}"); } finally { if (HttpProxyPool != null && proxy != null) { HttpProxyPool.ReturnProxy(proxy, httpResponseMessage?.StatusCode ?? HttpStatusCode.ServiceUnavailable); } try { httpResponseMessage?.Dispose(); } catch (Exception e) { Logger?.LogWarning($"任务 {request.OwnerId} 释放 {request.Url} 失败 [{i}]: {e}"); } } } return(response); }
protected override Response DowloadContent(Request request) { var response = new Response(request); if (IfFileExists(request)) { Logger?.LogInformation($"File {request.Url} already exists."); return(response); } var httpRequestMessage = GenerateHttpRequestMessage(request); HttpResponseMessage httpResponseMessage = null; WebProxy proxy = null; try { if (UseFiddlerProxy) { if (FiddlerProxy == null) { throw new DownloaderException("Fiddler proxy is null."); } else { proxy = FiddlerProxy; } } else { if (HttpProxyPool.Instance != null) { proxy = HttpProxyPool.Instance.GetProxy(); if (proxy == null) { throw new DownloaderException("No available proxy."); } } else { _clientObject = GetHttpClient("DEFAULT", AllowAutoRedirect, null); } } _clientObject = GetHttpClient(proxy == null ? "DEFAULT" : $"{proxy.Address}", AllowAutoRedirect, proxy); httpResponseMessage = NetworkCenter.Current.Execute("downloader", () => Task .Run(async() => await _clientObject.Client.SendAsync(httpRequestMessage)) .GetAwaiter() .GetResult()); response.StatusCode = httpResponseMessage.StatusCode; EnsureSuccessStatusCode(response.StatusCode); response.TargetUrl = httpResponseMessage.RequestMessage.RequestUri.AbsoluteUri; var bytes = httpResponseMessage.Content.ReadAsByteArrayAsync().Result; if (!ExcludeMediaTypes.Any(t => httpResponseMessage.Content.Headers.ContentType.MediaType.Contains(t))) { if (!DownloadFiles) { Logger?.LogWarning($"Ignore {request.Url} because media type is not allowed to download."); } else { StorageFile(request, bytes); } } else { var content = ReadContent(request, bytes, httpResponseMessage.Content.Headers.ContentType.CharSet); if (_decodeHtml && content is string) { #if NETFRAMEWORK content = System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content.ToString()), string.IsNullOrEmpty(request.EncodingName) ? Encoding.UTF8 : Encoding.GetEncoding(request.EncodingName)); #else content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content.ToString())); #endif } response.Content = content; DetectContentType(response, httpResponseMessage.Content.Headers.ContentType.MediaType); } } catch (DownloaderException) { throw; } catch (Exception e) { throw new DownloaderException($"Unexpected exception when download request: {request.Url}: {e}."); } finally { if (HttpProxyPool.Instance != null && proxy != null) { HttpProxyPool.Instance.ReturnProxy(proxy, httpResponseMessage == null ? HttpStatusCode.ServiceUnavailable : httpResponseMessage.StatusCode); } try { httpResponseMessage?.Dispose(); } catch (Exception e) { throw new BypassedDownloaderException($"Close response {request.Url} failed: {e.Message}"); } } return(response); }
protected override Response DowloadContent(Request request) { Response response = new Response(); response.Request = request; if (IfFileExists(request)) { Logger.Information($"File {request.Url} already exists."); return(response); } HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { httpWebRequest = GenerateHttpWebRequest(request); httpWebResponse = NetworkCenter.Current.Execute("downloader", () => (HttpWebResponse)httpWebRequest.GetResponse()); response.StatusCode = httpWebResponse.StatusCode; EnsureSuccessStatusCode(response.StatusCode); response.TargetUrl = httpWebResponse.ResponseUri.ToString(); var bytes = ReadResponseStream(httpWebResponse); if (!ExcludeMediaTypes.Any(t => httpWebResponse.ContentType.Contains(t))) { if (!DownloadFiles) { Logger.Warning($"Ignore {request.Url} because media type is not allowed to download."); } else { StorageFile(request, bytes); } } else { string content = ReadContent(request, bytes, httpWebResponse.ContentType); if (_decodeHtml) { #if NETFRAMEWORK content = System.Web.HttpUtility.UrlDecode(System.Web.HttpUtility.HtmlDecode(content), string.IsNullOrEmpty(request.EncodingName) ? Encoding.UTF8 : Encoding.GetEncoding(request.EncodingName)); #else content = WebUtility.UrlDecode(WebUtility.HtmlDecode(content)); #endif } response.Content = content; DetectContentType(response, httpWebResponse.ContentType); } } catch (DownloaderException) { throw; } catch (Exception e) { throw new DownloaderException($"Unexpected exception when download request: {request.Url}: {e}."); } finally { if (httpWebRequest != null) { var proxy = httpWebRequest.Proxy as WebProxy; if (HttpProxyPool.Instance != null && proxy != null && httpWebRequest.Proxy != FiddlerProxy) { HttpProxyPool.Instance.ReturnProxy(proxy, httpWebResponse == null ? HttpStatusCode.ServiceUnavailable : httpWebResponse.StatusCode); } try { httpWebResponse.Close(); } catch (Exception e) { throw new BypassedDownloaderException($"Close response {request.Url} failed: {e.Message}"); } } } return(response); }