コード例 #1
0
        /// <summary>
        /// 抓取页面时使用代理
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private string DownloadWithProxy(string url)
        {
            try
            {
                for (var i = 0; i < 3; i++)
                {
                    WebProxy webProxy = null;

                    var   service = new ProxyService();
                    Proxy proxy   = service.GetProxy();
                    if (proxy != null)
                    {
                        webProxy = new WebProxy(proxy.Adress + ":" + proxy.Port);
                    }

                    var html = HttpHelper.DownloadHtml(url, webProxy, TimeOut);
                    return(html);
                }
            }
            catch (Exception ex)
            {
            }
            return("");
        }
コード例 #2
0
ファイル: CrawlXiciDaili.cs プロジェクト: male110/ProxyPool
        public override void Start()
        {
            LogHelper.LogMsg("》》》开始抓取西刺代理");
            try
            {
                var           service = new ProxyService();
                List <string> list    = new List <string>()
                {
                    "http://www.xicidaili.com/nt/",
                    "http://www.xicidaili.com/nn/",
                    "http://www.xicidaili.com/wn/",
                    "http://www.xicidaili.com/wt/"
                };

                foreach (var utlitem in list)
                {
                    string url  = utlitem;
                    string html = DownloadProxyPage(url);
                    if (!CheckHtml(html, url, "西刺代理"))
                    {
                        continue;
                    }
                    HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(html);
                    HtmlNode node = doc.DocumentNode;

                    //获取总页数
                    var totalPage = GetTotalPage(node);
                    for (var i = 1; i <= totalPage; i++)
                    {
                        var pageUrl = url + i;
                        //如果是第一页,已经取过不用再取
                        if (i != 1)
                        {
                            html = DownloadProxyPage(pageUrl);
                            if (!CheckHtml(html, pageUrl, "西刺代理"))
                            {
                                return;
                            }
                            doc.LoadHtml(html);
                            node = doc.DocumentNode;
                        }
                        string             xpathstring = "//table[@id='ip_list']/tr[@class]";
                        HtmlNodeCollection collection  = node.SelectNodes(xpathstring);
                        if (collection == null)
                        {
                            LogHelper.LogError("无获取西刺代理ip,请检查网站结构是否有改动");
                            continue;
                        }
                        var proxyList = new List <Proxy>();
                        //提取代理
                        Parallel.ForEach(collection, item =>
                        {
                            Proxy proxy  = new Proxy();
                            string xpath = "td[2]";
                            proxy.Adress = item.SelectSingleNode(xpath).InnerHtml.Trim();
                            xpath        = "td[3]";
                            int port     = 0;
                            if (!int.TryParse(item.SelectSingleNode(xpath).InnerHtml.Trim(), out port))
                            {
                                LogHelper.LogError("西刺代理,取端口号时出错:" + item.InnerHtml);
                                return;
                            }
                            proxy.Port   = port;
                            proxy.Source = pageUrl;
                            proxyList.Add(proxy);
                        });
                        VerifyAndSave(proxyList);
                    }
                }
            }
            catch (Exception ex)
            {
                LogHelper.LogError("抓取西刺代理时出错:" + ex);
            }
            LogHelper.LogMsg("《《《西刺代理抓取完成");
        }