Beispiel #1
0
        /// <summary>
        /// 解析每一页数据
        /// </summary>
        /// <param name="param"></param>
        private static void DoWork(object param)
        {
            //参数还原
            Hashtable      table = param as Hashtable;
            int            start = Convert.ToInt32(table["start"]);
            int            end   = Convert.ToInt32(table["end"]);
            List <IPProxy> list  = table["list"] as List <IPProxy>;
            ProxyParam     Param = table["param"] as ProxyParam;

            //页面地址
            string             url   = string.Empty;
            string             ip    = string.Empty;
            IPProxy            item  = null;
            HtmlNodeCollection nodes = null;
            HtmlNode           node  = null;
            HtmlAttribute      atr   = null;

            for (int i = start; i <= end; i++)
            {
                LogHelper.WriteLog(string.Format("开始解析,页码{0}~{1},当前页码{2}", start, end, i));
                url = string.Format("{0}/{1}", Param.IPUrl, i);
                var doc = new HtmlDocument();
                doc.LoadHtml(GetHTML(url, Param.ProxyIp));
                //获取所有数据节点tr
                var trs = doc.DocumentNode.SelectNodes(@"//table[@id='ip_list']/tr");
                if (trs != null && trs.Count > 1)
                {
                    LogHelper.WriteLog(string.Format("当前页码{0},请求地址{1},共{2}条数据", i, url, trs.Count));
                    for (int j = 1; j < trs.Count; j++)
                    {
                        nodes = trs[j].SelectNodes("td");
                        if (nodes != null && nodes.Count > 9)
                        {
                            ip = nodes[2].InnerText.Trim();
                            if (Param.IsPingIp && !Ping(ip))
                            {
                                continue;
                            }
                            //有效的IP才添加
                            item = new IPProxy();

                            node = nodes[1].FirstChild;
                            if (node != null)
                            {
                                atr = node.Attributes["alt"];
                                if (atr != null)
                                {
                                    item.Country = atr.Value.Trim();
                                }
                            }

                            item.IP        = ip;
                            item.Port      = nodes[3].InnerText.Trim();
                            item.ProxyIp   = GetIP(item.IP, item.Port);
                            item.Position  = nodes[4].InnerText.Trim();
                            item.Anonymity = nodes[5].InnerText.Trim();
                            item.Type      = nodes[6].InnerText.Trim();

                            node = nodes[7].SelectSingleNode("div[@class='bar']");
                            if (node != null)
                            {
                                atr = node.Attributes["title"];
                                if (atr != null)
                                {
                                    item.Speed = atr.Value.Trim();
                                }
                            }

                            node = nodes[8].SelectSingleNode("div[@class='bar']");
                            if (node != null)
                            {
                                atr = node.Attributes["title"];
                                if (atr != null)
                                {
                                    item.ConnectTime = atr.Value.Trim();
                                }
                            }
                            item.VerifyTime = nodes[9].InnerText.Trim();
                            list.Add(item);
                        }
                    }
                    LogHelper.WriteLog(string.Format("当前页码{0},共{1}条数据", i, trs.Count));
                }
                LogHelper.WriteLog(string.Format("结束解析,页码{0}~{1},当前页码{2}", start, end, i));
            }
        }
Beispiel #2
0
        /// <summary>
        /// 解析每一页数据
        /// </summary>
        /// <param name="param"></param>
        private static void DoWork(object param)
        {
            //参数还原
            Hashtable table = param as Hashtable;
            int start = Convert.ToInt32(table["start"]);
            int end = Convert.ToInt32(table["end"]);
            List<IPProxy> list = table["list"] as List<IPProxy>;
            ProxyParam Param = table["param"] as ProxyParam;

            //页面地址
            string url = string.Empty;
            string ip = string.Empty;
            IPProxy item = null;
            HtmlNodeCollection nodes = null;
            HtmlNode node = null;
            HtmlAttribute atr = null;
            for (int i = start; i <= end; i++)
            {
                TaskLog.IpProxyLogInfo.WriteLogE(string.Format("开始解析,页码{0}~{1},当前页码{2}", start, end, i));
                url = string.Format("{0}/{1}", Param.IPUrl, i);
                var doc = new HtmlDocument();
                doc.LoadHtml(GetHTML(url, Param.ProxyIp));
                //获取所有数据节点tr
                var trs = doc.DocumentNode.SelectNodes(@"//table[@id='ip_list']/tr");
                if (trs != null && trs.Count > 1)
                {
                    TaskLog.IpProxyLogInfo.WriteLogE(string.Format("当前页码{0},请求地址{1},共{2}条数据", i, url, trs.Count));
                    for (int j = 1; j < trs.Count; j++)
                    {
                        nodes = trs[j].SelectNodes("td");
                        if (nodes != null && nodes.Count > 9)
                        {
                            ip = nodes[2].InnerText.Trim();
                            if (Param.IsPingIp && !Ping(ip))
                            {
                                continue;
                            }
                            //有效的IP才添加
                            item = new IPProxy();

                            node = nodes[1].FirstChild;
                            if (node != null)
                            {
                                atr = node.Attributes["alt"];
                                if (atr != null)
                                {
                                    item.Country = atr.Value.Trim();
                                }
                            }

                            item.IP = ip;
                            item.Port = nodes[3].InnerText.Trim();
                            item.ProxyIp = GetIP(item.IP, item.Port);
                            item.Position = nodes[4].InnerText.Trim();
                            item.Anonymity = nodes[5].InnerText.Trim();
                            item.Type = nodes[6].InnerText.Trim();

                            node = nodes[7].SelectSingleNode("div[@class='bar']");
                            if (node != null)
                            {
                                atr = node.Attributes["title"];
                                if (atr != null)
                                {
                                    item.Speed = atr.Value.Trim();
                                }
                            }

                            node = nodes[8].SelectSingleNode("div[@class='bar']");
                            if (node != null)
                            {
                                atr = node.Attributes["title"];
                                if (atr != null)
                                {
                                    item.ConnectTime = atr.Value.Trim();
                                }
                            }
                            item.VerifyTime = nodes[9].InnerText.Trim();
                            list.Add(item);
                        }
                    }
                    TaskLog.IpProxyLogInfo.WriteLogE(string.Format("当前页码{0},共{1}条数据", i, trs.Count));
                }
                TaskLog.IpProxyLogInfo.WriteLogE(string.Format("结束解析,页码{0}~{1},当前页码{2}", start, end, i));
            }
        }