/// 设置为无效ip /// </summary> /// <param name="curIPProxy"></param> public void SetUnviableIP(IPProxy _curIPProxy) { if (_curIPProxy != null) { var hitIpObj = IPProxyList.Where(c => c.IP == _curIPProxy.IP && c.Port == _curIPProxy.Port && c.Unavaiable == false).FirstOrDefault(); if (hitIpObj != null) { hitIpObj.Unavaiable = true; curIPProxy = null; GetIPProxy(); } } }
/// <summary> /// 设置代理 /// </summary> /// <param name="item">参数对象</param> private void SetProxy(HttpWebRequest request, IPProxy ipProxy) { if (ipProxy == null) { return; } string ProxyIp = ipProxy.IP; string ProxyPort = ipProxy.Port; string ProxyUserName = ipProxy.UserName; string ProxyPwd = ipProxy.PassWord; //设置代理服务器 if (ProxyIp.Contains(":")) { string[] plist = ProxyIp.Split(':'); WebProxy myProxy = new WebProxy(plist[0].Trim(), Convert.ToInt32(plist[1].Trim())); //建议连接 myProxy.Credentials = new NetworkCredential(ProxyUserName, ProxyPwd); //给当前请求对象 request.Proxy = myProxy; } else { if (!string.IsNullOrEmpty(ProxyPort)) { WebProxy myProxy = new WebProxy(ProxyIp, Convert.ToInt32(ProxyPort)); if (!string.IsNullOrEmpty(ProxyUserName)) { //建议连接 myProxy.Credentials = new NetworkCredential(ProxyUserName, ProxyPwd); } //给当前请求对象 request.Proxy = myProxy; } else { WebProxy myProxy = new WebProxy(ProxyIp, false); //建议连接 myProxy.Credentials = new NetworkCredential(ProxyUserName, ProxyPwd); //给当前请求对象 request.Proxy = myProxy; } } }
/// <summary> /// The crawl process. /// </summary> /// <param name="threadIndex"> /// The thread index. /// </param> private void CrawlProcess_Abort(object threadIndex) { var currentThreadIndex = (int)threadIndex; while (true) { // 根据队列中的 Url 数量和空闲线程的数量,判断线程是睡眠还是退出 if (UrlQueue.Instance.Count == 0) { this.threadStatus[currentThreadIndex] = true; if (!this.threadStatus.Any(t => t == false)) { break; } Thread.Sleep(2000); continue; } this.threadStatus[currentThreadIndex] = false; if (UrlQueue.Instance.Count == 0) { continue; } UrlInfo urlInfo = UrlQueue.Instance.DeQueue(); HttpWebRequest request = null; HttpWebResponse response = null; IPProxy curIPProxy = null; try { if (urlInfo == null) { continue; } // 1~5 秒随机间隔的自动限速 if (this.Settings.AutoSpeedLimit) { int span = this.random.Next(1000, 5000); Thread.Sleep(span); } // 创建并配置Web请求 request = WebRequest.Create(urlInfo.UrlString) as HttpWebRequest; curIPProxy = this.ConfigRequest(request);//返回当前的代理地址 if (request != null) { response = request.GetResponse() as HttpWebResponse; } if (response != null) { this.PersistenceCookie(response); Stream stream = null; // 如果页面压缩,则解压数据流 if (response.ContentEncoding == "gzip") { Stream responseStream = response.GetResponseStream(); if (responseStream != null) { stream = new GZipStream(responseStream, CompressionMode.Decompress); } } else { stream = response.GetResponseStream(); } using (stream) { string html = this.ParseContent(stream, response.CharacterSet); this.ParseLinks(urlInfo, html); if (this.DataReceivedEvent != null) { this.DataReceivedEvent( new DataReceivedEventArgs { Url = urlInfo.UrlString, Depth = urlInfo.Depth, Html = html, IpProx = curIPProxy }); } if (stream != null) { stream.Close(); } } } } catch (WebException webEx) { var ev = new CrawlErrorEventArgs { Url = urlInfo.UrlString, Depth = urlInfo.Depth, Exception = webEx, IpProx = curIPProxy }; if (webEx.Status == WebExceptionStatus.Timeout || webEx.Status == WebExceptionStatus.ProtocolError || webEx.Message.Contains("远程服务器返回错误") || webEx.Message.Contains("网关")) { //Settings.SetUnviableIP(curIPProxy);//设置为无效代理 ev.needChangeIp = true; } ev.needTryAgain = true; if (this.CrawlErrorEvent != null) { if (urlInfo != null) { this.CrawlErrorEvent(ev ); } } } catch (Exception exception) { var errorEV = new CrawlErrorEventArgs { Url = urlInfo.UrlString, Depth = urlInfo.Depth, Exception = exception, IpProx = curIPProxy }; if (exception.Message.Contains("超时") || exception.Message.Contains("远程服务器返回错误")) { // Settings.SetUnviableIP(curIPProxy);//设置为无效代理 errorEV.needChangeIp = true; } errorEV.needTryAgain = true; if (this.CrawlErrorEvent != null) { if (urlInfo != null) { this.CrawlErrorEvent(errorEV ); } } } finally { if (request != null) { request.Abort(); } if (response != null) { response.Close(); } } } }