예제 #1
0
        /// <summary>
        /// 通过企业名称获取企业信息
        /// </summary>
        /// <param name="名称"></param>
        /// <returns></returns>
        public HttpResult GetEnterpriseInfoByName(string name)
        {
            //企业背后关系详细
            var guidUrl = string.Format("https://www.qichacha.com/gongsi_getList?key={0}", name);
            var urlInfo = new UrlInfo(guidUrl)
            {
                Depth = 1, PostData = string.Format("key={0}&type=undefined", name)
            };
            var result = GetPostDataKeyWordEnhence(urlInfo);

            return(result);
        }
예제 #2
0
        private UrlInfo UrlInfoFix(UrlInfo urlInfo)
        {
            #region 进行url替换
            if (Settings.LandFangIUserId != 0)
            {
                var appChangeUrl = new LandFangAppHelper();
                var fixUrl       = appChangeUrl.FixIUserIdUrl(urlInfo.UrlString, Settings.LandFangIUserId.ToString());
                urlInfo.UrlString = fixUrl;
            }

            switch (Settings.CrawlerClassName)
            {
            case "WenShuAPPCrawler":
                var reqToken = Toolslib.Str.Sub(urlInfo.PostData, "reqtoken\": \"", "\",");
                if (string.IsNullOrEmpty(reqToken))
                {
                    reqToken = Settings.AccessToken;
                }
                urlInfo.PostData = urlInfo.PostData.Replace(reqToken, WenShuAppHelper.GetRequestToken());
                break;

            case "HuiCongMaterial":
                var huiCongAppHelper  = new HuiCongAppHelper();
                var authorizationCode = huiCongAppHelper.GetHuiCongAuthorizationCode(urlInfo.UrlString);
                if (authorizationCode != urlInfo.Authorization)
                {
                    urlInfo.Authorization = authorizationCode;
                }
                break;

            case "JGJApp":
                var jgjAppHelper = new JGJAppHelper();
                var fixUrl       = jgjAppHelper.FixJGJUrl(urlInfo.UrlString);
                urlInfo.UrlString = fixUrl;
                break;
            }

            return(urlInfo);

            #endregion
        }
예제 #3
0
        public UrlInfo FixJGJUrl(UrlInfo urlInfo)
        {
            var r    = ConvertDateTimeInt(DateTime.Now.AddSeconds(320)).ToString();
            var sign = SHA1_Encrypt(KEY + r);

            var url        = urlInfo.UrlString;
            var _timestamp = GetUrlParam(url, "timestamp");
            var _sign      = GetUrlParam(url, "sign");

            if (!string.IsNullOrEmpty(_timestamp) && _timestamp != r)
            {
                url = url.Replace(_timestamp, r);
            }
            if (!string.IsNullOrEmpty(_sign) && _sign != sign)
            {
                url = url.Replace(_sign, sign);
            }

            return(new UrlInfo(url)
            {
                UrlString = url, Depth = urlInfo.Depth
            });
        }
예제 #4
0
        public HttpResult GetPostDataKeyWordEnhence(UrlInfo curUrlObj, string refer = "", bool useProxy = true)
        {
            //创建Httphelper参数对象

            //curUrlObj.PostData = string.Format("key=安徽省合肥市荣事达大道568号511室 程华&type=undefined");
            HttpItem item = new HttpItem()
            {
                URL = curUrlObj.UrlString,                                        //URL     必需项

                ContentType = "application/x-www-form-urlencoded; charset=UTF-8", //返回类型    可选项有默认值

                Timeout  = 1500,
                Accept   = "*/*",
                Encoding = null, //编码格式(utf-8,gb2312,gbk)     可选项 默认类会自动识别
                //Encoding = Encoding.Default,
                Method = "post", //URL     可选项 默认为Get
                //Timeout = 100000,//连接超时时间     可选项默认为100000
                //ReadWriteTimeout = 30000,//写入Post数据超时时间     可选项默认为30000
                //IsToLower = false,//得到的HTML代码是否转成小写     可选项默认转小写
                //Cookie = "",//字符串Cookie     可选项
                UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36", //用户的浏览器类型,版本,操作系统     可选项有默认值
                Referer           = "https://www.qichacha.com/",                                                                                           //来源URL     可选项
                Postdata          = curUrlObj.PostData,
                Allowautoredirect = true,
                Cookie            = globalCookie,
                KeepAlive         = true,
            };

            //item.WebProxy = GetWebProxy();
            item.PostEncoding = System.Text.Encoding.GetEncoding("utf-8");
            var result = http.GetHtml(item);

            if (string.IsNullOrEmpty(result.Html))
            {
            }
            return(result);
        }
예제 #5
0
        /// <summary>
        /// The parse links.
        /// </summary>
        /// <param name="urlInfo">
        /// The url info.
        /// </param>
        /// <param name="html">
        /// The html.
        /// </param>
        private void ParseLinks(UrlInfo urlInfo, string html)
        {
            if (this.Settings.Depth > 0 && urlInfo.Depth >= this.Settings.Depth)
            {
                return;
            }

            var urlDictionary = new Dictionary <string, string>();

            Match match = Regex.Match(html, "(?i)<a .*?href=\"([^\"]+)\"[^>]*>(.*?)</a>");

            while (match.Success)
            {
                // 以 href 作为 key
                string urlKey = match.Groups[1].Value;

                // 以 text 作为 value
                string urlValue = Regex.Replace(match.Groups[2].Value, "(?i)<.*?>", string.Empty);

                urlDictionary[urlKey] = urlValue;
                match = match.NextMatch();
            }

            foreach (var item in urlDictionary)
            {
                string href = item.Key;
                string text = item.Value;

                if (!string.IsNullOrEmpty(href))
                {
                    bool canBeAdd = true;

                    if (this.Settings.EscapeLinks != null && this.Settings.EscapeLinks.Count > 0)
                    {
                        if (this.Settings.EscapeLinks.Any(suffix => href.EndsWith(suffix, StringComparison.OrdinalIgnoreCase)))
                        {
                            canBeAdd = false;
                        }
                    }

                    if (this.Settings.HrefKeywords != null && this.Settings.HrefKeywords.Count > 0)
                    {
                        if (!this.Settings.HrefKeywords.Any(href.Contains))
                        {
                            canBeAdd = false;
                        }
                    }

                    if (canBeAdd)
                    {
                        string url = href.Replace("%3f", "?")
                                     .Replace("%3d", "=")
                                     .Replace("%2f", "/")
                                     .Replace("&amp;", "&");

                        if (string.IsNullOrEmpty(url) || url.StartsWith("#") ||
                            url.StartsWith("mailto:", StringComparison.OrdinalIgnoreCase) ||
                            url.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase))
                        {
                            continue;
                        }

                        var baseUri    = new Uri(urlInfo.UrlString);
                        Uri currentUri = url.StartsWith("http", StringComparison.OrdinalIgnoreCase)
                                             ? new Uri(url)
                                             : new Uri(baseUri, url);

                        url = currentUri.AbsoluteUri;

                        if (this.Settings.LockHost)
                        {
                            // 去除二级域名后,判断域名是否相等,相等则认为是同一个站点
                            // 例如:mail.pzcast.com 和 www.pzcast.com
                            if (baseUri.Host.Split('.').Skip(1).Aggregate((a, b) => a + "." + b)
                                != currentUri.Host.Split('.').Skip(1).Aggregate((a, b) => a + "." + b))
                            {
                                continue;
                            }
                        }

                        if (!this.IsMatchRegular(url))
                        {
                            continue;
                        }

                        var addUrlEventArgs = new AddUrlEventArgs {
                            Title = text, Depth = urlInfo.Depth + 1, Url = url
                        };
                        if (this.AddUrlEvent != null && !this.AddUrlEvent(addUrlEventArgs))
                        {
                            continue;
                        }

                        UrlQueue.Instance.EnQueue(new UrlInfo(url)
                        {
                            Depth = urlInfo.Depth + 1
                        });
                    }
                }
            }
        }
예제 #6
0
        /// <summary>
        /// The crawl process.
        /// </summary>
        /// <param name="threadIndex">
        /// The thread index.
        /// </param>
        private void CrawlProcess(object threadIndex)
        {
            var currentThreadIndex = (int)threadIndex;

            while (true)
            {
                // 根据队列中的 Url 数量和空闲线程的数量,判断线程是睡眠还是退出
                if (UrlQueue.Instance.Count == 0)
                {
                    this.threadStatus[currentThreadIndex] = true;
                    if (!this.threadStatus.Any(t => t == false))
                    {
                        break;
                    }

                    Thread.Sleep(2000);
                    continue;
                }

                this.threadStatus[currentThreadIndex] = false;

                if (UrlQueue.Instance.Count == 0)
                {
                    continue;
                }

                UrlInfo urlInfo = UrlQueue.Instance.DeQueue();

                HttpWebRequest  request  = null;
                HttpWebResponse response = null;

                try
                {
                    if (urlInfo == null)
                    {
                        continue;
                    }

                    // 1~5 秒随机间隔的自动限速
                    if (this.Settings.AutoSpeedLimit)
                    {
                        int span = this.random.Next(1000, 5000);
                        Thread.Sleep(span);
                    }

                    // 创建并配置Web请求
                    request = WebRequest.Create(urlInfo.UrlString) as HttpWebRequest;
                    this.ConfigRequest(request);

                    if (request != null)
                    {
                        response = request.GetResponse() as HttpWebResponse;
                    }

                    if (response != null)
                    {
                        this.PersistenceCookie(response);

                        Stream stream = null;

                        // 如果页面压缩,则解压数据流
                        if (response.ContentEncoding == "gzip")
                        {
                            Stream responseStream = response.GetResponseStream();
                            if (responseStream != null)
                            {
                                stream = new GZipStream(responseStream, CompressionMode.Decompress);
                            }
                        }
                        else
                        {
                            stream = response.GetResponseStream();
                        }

                        using (stream)
                        {
                            string html = this.ParseContent(stream, response.CharacterSet);

                            this.ParseLinks(urlInfo, html);

                            if (this.DataReceivedEvent != null)
                            {
                                this.DataReceivedEvent(
                                    new DataReceivedEventArgs
                                {
                                    Url   = urlInfo.UrlString,
                                    Depth = urlInfo.Depth,
                                    Html  = html
                                });
                            }

                            if (stream != null)
                            {
                                stream.Close();
                            }
                        }
                    }
                }
                catch (Exception exception)
                {
                    if (this.CrawlErrorEvent != null)
                    {
                        if (urlInfo != null)
                        {
                            this.CrawlErrorEvent(
                                new CrawlErrorEventArgs {
                                Url = urlInfo.UrlString, Exception = exception
                            });
                        }
                    }
                }
                finally
                {
                    if (request != null)
                    {
                        request.Abort();
                    }

                    if (response != null)
                    {
                        response.Close();
                    }
                }
            }
        }
예제 #7
0
        /// <summary>
        /// The parse links.
        /// </summary>
        /// <param name="urlInfo">
        /// The url info.
        /// </param>
        /// <param name="html">
        /// The html.
        /// </param>
        private void ParseLinks(UrlInfo urlInfo, string html)
        {
            if (this.Settings.Depth > 0 && urlInfo.Depth >= this.Settings.Depth)
            {
                return;
            }

            var urlDictionary = new Dictionary <string, string>();

            // Match match = Regex.Match(html, "(?i)<a .*?href=\"([^\"]+)\"[^>]*>(.*?)</a>");
            //var testStr = "<a href=\"http://baidu.com\" >融信鹤林花园</ a > ";
            //var testStr = "<A href=\"proDetail.asp? projectID = MTAyMjF8MjAxNS8xMC8yNnwyNA == \" target=_blank>阳光环站新城1#地...</a>";
            //  var firstIndex = html.IndexOf("<A href='result_new.asp");
            // var testStr = html.Substring(firstIndex,200);
            //2016.5.24修正<a href="xxx"><span>123</span></a>获取不到问题
            //Match match = Regex.Match(html.Replace("'","\""), "(?i)<a .*?href=[\",']([^\"]+)[\",'][^>]*>[^<]*</a>");
            Match match = Regex.Match(html.Replace("'", "\""), "(?i)<a .*?href=[\",']([^\"]+)[\",'][^>]*>" + @".*?</a>");

            while (match.Success)
            {
                // 以 href 作为 key
                string urlKey = match.Groups[1].Value;

                // 以 text 作为 value
                string urlValue = Regex.Replace(match.Groups[0].Value, "(?i)<.*?>", string.Empty);

                urlDictionary[urlKey] = urlValue;
                match = match.NextMatch();
            }

            foreach (var item in urlDictionary)
            {
                string href = item.Key;
                string text = item.Value;

                if (!string.IsNullOrEmpty(href))
                {
                    bool canBeAdd = true;

                    if (this.Settings.EscapeLinks != null && this.Settings.EscapeLinks.Count > 0)
                    {
                        if (this.Settings.EscapeLinks.Any(suffix => href.EndsWith(suffix, StringComparison.OrdinalIgnoreCase)))
                        {
                            canBeAdd = false;
                        }
                    }

                    if (this.Settings.HrefKeywords != null && this.Settings.HrefKeywords.Count > 0)
                    {
                        if (!this.Settings.HrefKeywords.Any(href.Contains))
                        {
                            canBeAdd = false;
                        }
                    }

                    if (canBeAdd)
                    {
                        string url = href.Replace("%3f", "?")
                                     .Replace("%3d", "=")
                                     .Replace("%2f", "/")
                                     .Replace("&amp;", "&");

                        if (string.IsNullOrEmpty(url) || url.StartsWith("#") ||
                            url.StartsWith("mailto:", StringComparison.OrdinalIgnoreCase) ||
                            url.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase))
                        {
                            continue;
                        }
                        Uri baseUri    = null;
                        Uri currentUri = null;
                        try
                        {
                            baseUri    = new Uri(urlInfo.UrlString);
                            currentUri = url.StartsWith("http", StringComparison.OrdinalIgnoreCase)
                                                 ? new Uri(url)
                                                 : new Uri(baseUri, url);

                            url = currentUri.AbsoluteUri;
                        }
                        catch (Exception ex)
                        {
                            continue;
                        }
                        if (this.Settings.LockHost)
                        {
                            // 去除二级域名后,判断域名是否相等,相等则认为是同一个站点
                            // 例如:mail.pzcast.com 和 www.pzcast.com
                            if (baseUri.Host.Split('.').Skip(1).Aggregate((a, b) => a + "." + b)
                                != currentUri.Host.Split('.').Skip(1).Aggregate((a, b) => a + "." + b))
                            {
                                continue;
                            }
                        }

                        if (!this.IsMatchRegular(url))
                        {
                            continue;
                        }

                        var addUrlEventArgs = new AddUrlEventArgs {
                            Title = text, Depth = urlInfo.Depth + 1, Url = url
                        };
                        if (this.AddUrlEvent != null && !this.AddUrlEvent(addUrlEventArgs))
                        {
                            continue;
                        }

                        UrlQueue.Instance.EnQueue(new UrlInfo(url)
                        {
                            Depth = urlInfo.Depth + 1
                        });
                    }
                }
            }
        }
예제 #8
0
        private string GetHttpResult(UrlInfo urlInfo)
        {
            urlInfo = UrlInfoFix(urlInfo);
            var url = urlInfo.UrlString;

            HttpHelper http = new HttpHelper();
            HttpItem   item = null;

            item = new HttpItem()
            {
                URL = url,                 //URL     必需项
                //URL = "http://luckymn.cn/QuestionAnswer",
                Method      = "get",       //URL     可选项 默认为Get
                ContentType = "text/html", //返回类型    可选项有默认值
                Timeout     = this.Settings.Timeout,
                UserAgent   = this.Settings.UserAgent,
            };


            // item.Header.Add("Accept", "text/html, application/xhtml+xml, */*");


            if (!string.IsNullOrEmpty(urlInfo.PostData))
            {
                item.Method   = "post";
                item.Postdata = urlInfo.PostData;
            }


            if (Settings.CurWebProxy != null)
            {
                item.WebProxy = Settings.CurWebProxy;
            }
            else
            {
                var curIPProxy = Settings.GetIPProxy();
                if (curIPProxy != null)
                {
                    item.ProxyIp = curIPProxy.IP;
                }
            }
            if (!string.IsNullOrEmpty(this.Settings.SimulateCookies))
            {
                item.Cookie = this.Settings.SimulateCookies;
            }
            if (!string.IsNullOrEmpty(this.Settings.ContentType))
            {
                item.ContentType = this.Settings.ContentType;
            }
            if (!string.IsNullOrEmpty(this.Settings.Referer))
            {
                item.Referer = this.Settings.Referer;
            }
            if (this.Settings.PostEncoding != null)
            {
                item.PostEncoding = this.Settings.PostEncoding;
            }
            if (!string.IsNullOrEmpty(this.Settings.ContentType))
            {
                item.ContentType = this.Settings.ContentType;
            }
            if (!string.IsNullOrEmpty(this.Settings.Accept))
            {
                item.Accept = this.Settings.Accept;
            }
            if (!string.IsNullOrEmpty(urlInfo.Authorization))
            {
                item.Header.Add("Authorization", urlInfo.Authorization);
            }

            try
            {
                if (Settings.HeadSetDic != null)
                {
                    foreach (var key in Settings.HeadSetDic.Keys)
                    {
                        item.Header.Add(key, Settings.HeadSetDic[key]);
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("GetHttpResult:" + ex.Message);
            }
            //添加代理ip列表,随机挑选ip
            //创建并配置Web请求
            //request = WebRequest.Create(urlInfo.UrlString) as HttpWebRequest;
            //curIPProxy = this.ConfigRequest(request);//返回当前的代理地址
            var result = http.GetHtml(item);

            return(result.Html);
        }
예제 #9
0
        /// <summary>
        /// The crawl process.
        /// </summary>
        /// <param name="threadIndex">
        /// The thread index.
        /// </param>
        private void CrawlProcess(object threadIndex)
        {
            var currentThreadIndex = (int)threadIndex;

            while (true)
            {
                // 根据队列中的 Url 数量和空闲线程的数量,判断线程是睡眠还是退出
                if (UrlQueue.Instance.Count == 0)
                {
                    this.threadStatus[currentThreadIndex] = true;
                    if (!this.threadStatus.Any(t => t == false))
                    {
                        break;
                    }

                    Thread.Sleep(2000);
                    continue;
                }

                this.threadStatus[currentThreadIndex] = false;

                if (UrlQueue.Instance.Count == 0)
                {
                    continue;
                }

                UrlInfo urlInfo = UrlQueue.Instance.DeQueue();



                var curIPProxy = Settings.GetIPProxy();
                try
                {
                    if (urlInfo == null)
                    {
                        continue;
                    }

                    // 1~5 秒随机间隔的自动限速
                    if (this.Settings.AutoSpeedLimit)
                    {
                        try
                        {
                            var maxSecond = 5000;
                            var inSecond  = 1000;
                            if (this.Settings.AutoSpeedLimitMinMSecond >= inSecond)
                            {
                                inSecond = this.Settings.AutoSpeedLimitMinMSecond;
                            }
                            if (this.Settings.AutoSpeedLimitMaxMSecond >= maxSecond)
                            {
                                maxSecond = this.Settings.AutoSpeedLimitMaxMSecond;
                            }

                            int span = this.random.Next(inSecond, maxSecond);

                            Thread.Sleep(span);
                        }
                        catch (Exception ex)
                        {
                            throw new Exception("AutoSpeedLimit" + ex.Message);
                        }
                    }
                    string html = string.Empty;
                    switch (Settings.CrawlMode)
                    {
                    case EnumCrawlMode.PhantomJsViaSelenium:
                        html = GetPhantomJsResult(urlInfo);
                        break;

                    case EnumCrawlMode.HttpHelper:
                    case EnumCrawlMode.SuperWebClient:
                        if (Settings.UseSuperWebClient)
                        {
                            html = GetSupperHttpResult(urlInfo);
                        }
                        else
                        {
                            html = GetHttpResult(urlInfo);
                        }
                        break;
                    }

                    if (!string.IsNullOrEmpty(html))
                    {
                        this.ParseLinks(urlInfo, html);
                    }

                    if (this.DataReceivedEvent != null)
                    {
                        this.DataReceivedEvent(
                            new DataReceivedEventArgs
                        {
                            Url    = urlInfo.UrlString,
                            Depth  = urlInfo.Depth,
                            Html   = html,
                            IpProx = curIPProxy, urlInfo = urlInfo
                        });
                    }
                }
                catch (WebException webEx)
                {
                    var ev = new CrawlErrorEventArgs
                    {
                        Url       = urlInfo.UrlString,
                        Depth     = urlInfo.Depth,
                        Exception = webEx,
                        IpProx    = curIPProxy,
                        urlInfo   = urlInfo
                    };
                    if (webEx.Status == WebExceptionStatus.Timeout || webEx.Status == WebExceptionStatus.ProtocolError || webEx.Message.Contains("远程服务器返回错误") || webEx.Message.Contains("网关"))
                    {
                        //Settings.SetUnviableIP(curIPProxy);//设置为无效代理
                        ev.needChangeIp = true;
                    }
                    ev.needTryAgain = true;
                    if (this.CrawlErrorEvent != null)
                    {
                        if (urlInfo != null)
                        {
                            this.CrawlErrorEvent(ev
                                                 );
                        }
                    }
                }

                catch (Exception exception)
                {
                    var errorEV = new CrawlErrorEventArgs {
                        Url = urlInfo.UrlString, Depth = urlInfo.Depth, Exception = exception, IpProx = curIPProxy, urlInfo = urlInfo
                    };

                    if (exception.Message.Contains("超时") || exception.Message.Contains("远程服务器返回错误"))
                    {
                        // Settings.SetUnviableIP(curIPProxy);//设置为无效代理
                        errorEV.needChangeIp = true;
                    }
                    errorEV.needTryAgain = true;
                    if (this.CrawlErrorEvent != null)
                    {
                        if (urlInfo != null)
                        {
                            this.CrawlErrorEvent(errorEV
                                                 );
                        }
                    }
                }
                finally
                {
                    //if (request != null)
                    //{
                    //    request.Abort();
                    //}

                    //if (response != null)
                    //{
                    //    response.Close();
                    //}
                }
            }
        }
예제 #10
0
        /// <summary>
        /// The crawl process.
        /// </summary>
        /// <param name="threadIndex">
        /// The thread index.
        /// </param>
        private void CrawlProcess_Abort(object threadIndex)
        {
            var currentThreadIndex = (int)threadIndex;

            while (true)
            {
                // 根据队列中的 Url 数量和空闲线程的数量,判断线程是睡眠还是退出
                if (UrlQueue.Instance.Count == 0)
                {
                    this.threadStatus[currentThreadIndex] = true;
                    if (!this.threadStatus.Any(t => t == false))
                    {
                        break;
                    }

                    Thread.Sleep(2000);
                    continue;
                }

                this.threadStatus[currentThreadIndex] = false;

                if (UrlQueue.Instance.Count == 0)
                {
                    continue;
                }

                UrlInfo urlInfo = UrlQueue.Instance.DeQueue();

                HttpWebRequest  request    = null;
                HttpWebResponse response   = null;
                IPProxy         curIPProxy = null;
                try
                {
                    if (urlInfo == null)
                    {
                        continue;
                    }

                    // 1~5 秒随机间隔的自动限速
                    if (this.Settings.AutoSpeedLimit)
                    {
                        int span = this.random.Next(1000, 5000);
                        Thread.Sleep(span);
                    }

                    // 创建并配置Web请求
                    request    = WebRequest.Create(urlInfo.UrlString) as HttpWebRequest;
                    curIPProxy = this.ConfigRequest(request);//返回当前的代理地址

                    if (request != null)
                    {
                        response = request.GetResponse() as HttpWebResponse;
                    }

                    if (response != null)
                    {
                        this.PersistenceCookie(response);

                        Stream stream = null;

                        // 如果页面压缩,则解压数据流
                        if (response.ContentEncoding == "gzip")
                        {
                            Stream responseStream = response.GetResponseStream();
                            if (responseStream != null)
                            {
                                stream = new GZipStream(responseStream, CompressionMode.Decompress);
                            }
                        }
                        else
                        {
                            stream = response.GetResponseStream();
                        }

                        using (stream)
                        {
                            string html = this.ParseContent(stream, response.CharacterSet);

                            this.ParseLinks(urlInfo, html);

                            if (this.DataReceivedEvent != null)
                            {
                                this.DataReceivedEvent(
                                    new DataReceivedEventArgs
                                {
                                    Url   = urlInfo.UrlString,
                                    Depth = urlInfo.Depth,
                                    Html  = html, IpProx = curIPProxy
                                });
                            }

                            if (stream != null)
                            {
                                stream.Close();
                            }
                        }
                    }
                }
                catch (WebException webEx)
                {
                    var ev = new CrawlErrorEventArgs
                    {
                        Url       = urlInfo.UrlString,
                        Depth     = urlInfo.Depth,
                        Exception = webEx,
                        IpProx    = curIPProxy
                    };
                    if (webEx.Status == WebExceptionStatus.Timeout || webEx.Status == WebExceptionStatus.ProtocolError || webEx.Message.Contains("远程服务器返回错误") || webEx.Message.Contains("网关"))
                    {
                        //Settings.SetUnviableIP(curIPProxy);//设置为无效代理
                        ev.needChangeIp = true;
                    }
                    ev.needTryAgain = true;
                    if (this.CrawlErrorEvent != null)
                    {
                        if (urlInfo != null)
                        {
                            this.CrawlErrorEvent(ev
                                                 );
                        }
                    }
                }

                catch (Exception exception)
                {
                    var errorEV = new CrawlErrorEventArgs {
                        Url = urlInfo.UrlString, Depth = urlInfo.Depth, Exception = exception, IpProx = curIPProxy
                    };

                    if (exception.Message.Contains("超时") || exception.Message.Contains("远程服务器返回错误"))
                    {
                        // Settings.SetUnviableIP(curIPProxy);//设置为无效代理
                        errorEV.needChangeIp = true;
                    }
                    errorEV.needTryAgain = true;
                    if (this.CrawlErrorEvent != null)
                    {
                        if (urlInfo != null)
                        {
                            this.CrawlErrorEvent(errorEV
                                                 );
                        }
                    }
                }
                finally
                {
                    if (request != null)
                    {
                        request.Abort();
                    }

                    if (response != null)
                    {
                        response.Close();
                    }
                }
            }
        }