Пример #1
0
 /// 设置为无效ip
 /// </summary>
 /// <param name="curIPProxy"></param>
 public void SetUnviableIP(IPProxy _curIPProxy)
 {
     if (_curIPProxy != null)
     {
         var hitIpObj = IPProxyList.Where(c => c.IP == _curIPProxy.IP && c.Port == _curIPProxy.Port && c.Unavaiable == false).FirstOrDefault();
         if (hitIpObj != null)
         {
             hitIpObj.Unavaiable = true;
             curIPProxy          = null;
             GetIPProxy();
         }
     }
 }
Пример #2
0
        /// <summary>
        /// 设置代理
        /// </summary>
        /// <param name="item">参数对象</param>
        private void SetProxy(HttpWebRequest request, IPProxy ipProxy)
        {
            if (ipProxy == null)
            {
                return;
            }
            string ProxyIp       = ipProxy.IP;
            string ProxyPort     = ipProxy.Port;
            string ProxyUserName = ipProxy.UserName;
            string ProxyPwd      = ipProxy.PassWord;

            //设置代理服务器
            if (ProxyIp.Contains(":"))
            {
                string[] plist   = ProxyIp.Split(':');
                WebProxy myProxy = new WebProxy(plist[0].Trim(), Convert.ToInt32(plist[1].Trim()));
                //建议连接
                myProxy.Credentials = new NetworkCredential(ProxyUserName, ProxyPwd);
                //给当前请求对象
                request.Proxy = myProxy;
            }
            else
            {
                if (!string.IsNullOrEmpty(ProxyPort))
                {
                    WebProxy myProxy = new WebProxy(ProxyIp, Convert.ToInt32(ProxyPort));
                    if (!string.IsNullOrEmpty(ProxyUserName))
                    {
                        //建议连接
                        myProxy.Credentials = new NetworkCredential(ProxyUserName, ProxyPwd);
                    }
                    //给当前请求对象
                    request.Proxy = myProxy;
                }
                else
                {
                    WebProxy myProxy = new WebProxy(ProxyIp, false);
                    //建议连接
                    myProxy.Credentials = new NetworkCredential(ProxyUserName, ProxyPwd);
                    //给当前请求对象
                    request.Proxy = myProxy;
                }
            }
        }
Пример #3
0
        /// <summary>
        /// The crawl process.
        /// </summary>
        /// <param name="threadIndex">
        /// The thread index.
        /// </param>
        private void CrawlProcess_Abort(object threadIndex)
        {
            var currentThreadIndex = (int)threadIndex;

            while (true)
            {
                // 根据队列中的 Url 数量和空闲线程的数量,判断线程是睡眠还是退出
                if (UrlQueue.Instance.Count == 0)
                {
                    this.threadStatus[currentThreadIndex] = true;
                    if (!this.threadStatus.Any(t => t == false))
                    {
                        break;
                    }

                    Thread.Sleep(2000);
                    continue;
                }

                this.threadStatus[currentThreadIndex] = false;

                if (UrlQueue.Instance.Count == 0)
                {
                    continue;
                }

                UrlInfo urlInfo = UrlQueue.Instance.DeQueue();

                HttpWebRequest  request    = null;
                HttpWebResponse response   = null;
                IPProxy         curIPProxy = null;
                try
                {
                    if (urlInfo == null)
                    {
                        continue;
                    }

                    // 1~5 秒随机间隔的自动限速
                    if (this.Settings.AutoSpeedLimit)
                    {
                        int span = this.random.Next(1000, 5000);
                        Thread.Sleep(span);
                    }

                    // 创建并配置Web请求
                    request    = WebRequest.Create(urlInfo.UrlString) as HttpWebRequest;
                    curIPProxy = this.ConfigRequest(request);//返回当前的代理地址

                    if (request != null)
                    {
                        response = request.GetResponse() as HttpWebResponse;
                    }

                    if (response != null)
                    {
                        this.PersistenceCookie(response);

                        Stream stream = null;

                        // 如果页面压缩,则解压数据流
                        if (response.ContentEncoding == "gzip")
                        {
                            Stream responseStream = response.GetResponseStream();
                            if (responseStream != null)
                            {
                                stream = new GZipStream(responseStream, CompressionMode.Decompress);
                            }
                        }
                        else
                        {
                            stream = response.GetResponseStream();
                        }

                        using (stream)
                        {
                            string html = this.ParseContent(stream, response.CharacterSet);

                            this.ParseLinks(urlInfo, html);

                            if (this.DataReceivedEvent != null)
                            {
                                this.DataReceivedEvent(
                                    new DataReceivedEventArgs
                                {
                                    Url   = urlInfo.UrlString,
                                    Depth = urlInfo.Depth,
                                    Html  = html, IpProx = curIPProxy
                                });
                            }

                            if (stream != null)
                            {
                                stream.Close();
                            }
                        }
                    }
                }
                catch (WebException webEx)
                {
                    var ev = new CrawlErrorEventArgs
                    {
                        Url       = urlInfo.UrlString,
                        Depth     = urlInfo.Depth,
                        Exception = webEx,
                        IpProx    = curIPProxy
                    };
                    if (webEx.Status == WebExceptionStatus.Timeout || webEx.Status == WebExceptionStatus.ProtocolError || webEx.Message.Contains("远程服务器返回错误") || webEx.Message.Contains("网关"))
                    {
                        //Settings.SetUnviableIP(curIPProxy);//设置为无效代理
                        ev.needChangeIp = true;
                    }
                    ev.needTryAgain = true;
                    if (this.CrawlErrorEvent != null)
                    {
                        if (urlInfo != null)
                        {
                            this.CrawlErrorEvent(ev
                                                 );
                        }
                    }
                }

                catch (Exception exception)
                {
                    var errorEV = new CrawlErrorEventArgs {
                        Url = urlInfo.UrlString, Depth = urlInfo.Depth, Exception = exception, IpProx = curIPProxy
                    };

                    if (exception.Message.Contains("超时") || exception.Message.Contains("远程服务器返回错误"))
                    {
                        // Settings.SetUnviableIP(curIPProxy);//设置为无效代理
                        errorEV.needChangeIp = true;
                    }
                    errorEV.needTryAgain = true;
                    if (this.CrawlErrorEvent != null)
                    {
                        if (urlInfo != null)
                        {
                            this.CrawlErrorEvent(errorEV
                                                 );
                        }
                    }
                }
                finally
                {
                    if (request != null)
                    {
                        request.Abort();
                    }

                    if (response != null)
                    {
                        response.Close();
                    }
                }
            }
        }