예제 #1
0
        public void Run()
        {
            List <string> orderList  = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/TaobaoProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    orderList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            string          url       = "http://121.199.38.28/ip/?tid={0}&num=1&ports=80,808,3128&filter=on";
            string          error     = "ERROR|订单剩余数量不足";
            SysData_ProxyIp existsObj = new SysData_ProxyIp();
            string          message   = "";

            foreach (string orderStr in orderList)
            {
                string requestUrl = string.Format(url, orderStr);
                while (true)
                {
                    string ipHtml = "";
begin:
                    try
                    {
                        ipHtml = SpiderHelp.GetHtml(requestUrl, "utf-8");
                    }
                    catch (Exception ex)
                    {
                        System.Threading.Thread.Sleep(3000);
                        goto begin;
                    }
                    if (ipHtml.Contains(error))
                    {
                        break;
                    }
                    int result = ProxyIpHelp.ImportProxyIp(ipHtml, "匿名", out existsObj, out message);
                    if (result != 1)
                    {
                        log.Debug(string.Format("{0},订单:{1},url:{2},ip:{3}", message, orderStr, url, ipHtml == null ? "null" : ipHtml));
                        continue;
                    }
                    else
                    {
                        log.Debug(string.Format("ip插入成功,订单:{0},url:{1},ip:{2}", orderStr, url, ipHtml == null ? "null" : ipHtml));
                    }
                }
                log.Debug(string.Format("订单:{0}导入完成,url:{1}", orderStr, url));
            }
            log.Debug("所有订单:导入完成,url:{1}");
        }
예제 #2
0
        public void RunPageList(object url)
        {
            string _url        = Convert.ToString(url);
            string pageUrlPara = url + "&pageid={0}";

            try
            {
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                bool            notStop   = true;
                string          message   = "";
                int             pageIndex = 1;
                while (notStop)
                {
                    string paegUrl  = string.Format(pageUrlPara, pageIndex);
                    int    reqCount = 1;
reqBegin:
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(paegUrl, "gb2312", regexDic);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"];//所有ip集合
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        if (reqCount < 3)
                        {
                            reqCount = reqCount + 1;
                            goto reqBegin;
                        }
                        else
                        {
                            pageIndex = 1;
                            log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                            continue;
                        }
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic);
                        string ip = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        ip = ip.TrimBlank();
                        int result = ProxyIpHelp.ImportProxyIp(ip, "匿名", out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                    }
                    pageIndex = pageIndex + 1;
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }
예제 #3
0
        public void Run()
        {
            //ip列表页ip信息
            RegexInfo regex_ipInfo = new RegexInfo("([^<>]+\\@[^<>]+</font>)", "$1");
            //ip列表页ip
            RegexInfo regex_ip = new RegexInfo("([^<>]+)\\@[^<>]+</font>", "$1");
            //ip列表页ip区域
            RegexInfo regex_area = new RegexInfo("[^<>]+\\#([^<>]+)</font>", "$1");
            Dictionary <string, RegexInfo> regexDic1 = new Dictionary <string, RegexInfo>();

            regexDic1.Add("regex_ipInfo", regex_ipInfo);
            Dictionary <string, RegexInfo> regexDic2 = new Dictionary <string, RegexInfo>();

            regexDic2.Add("regex_ip", regex_ip);
            regexDic2.Add("regex_area", regex_area);
            //从文本文件中获取爬取ip分类页面列表
            List <string> urlList    = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/Lastro网IP代理ProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    urlList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            //但从当前页面中获取爬取ip分类列表
            RegexInfo regex_urllist                  = new RegexInfo("<a href=\"([^\"]+)\"[^<>]*>【国内代理】[^<>]+</a>", "$1");
            RegexInfo regex_urllist_pagecount        = new RegexInfo("<span title=\"共[^\"]+页\"> /([^<>]+)页", "$1");
            Dictionary <string, RegexInfo> regexDic3 = new Dictionary <string, RegexInfo>();

            regexDic3.Add("regex_urllist", regex_urllist);
            regexDic3.Add("regex_urllist_pagecount", regex_urllist_pagecount);
            int    urllist_index         = 1;
            int    urllist_max_index     = 1;
            string urllist_nextPage_para = "http://www.httpip.net/forum-36-{0}.html";           //下一页
            string urllist_nextPage      = string.Format(urllist_nextPage_para, urllist_index); //下一页

            try
            {
begin_nextpage:
                Dictionary <string, List <string> > dicValueList3 = SpiderHelp.GetHtmlByRegexNotProxyIp(urllist_nextPage, "gb2312", regexDic3);
                List <string> urlList2 = dicValueList3["regex_urllist"];//所有ip列表集合
                urlList.AddRange(urlList2);
                urllist_max_index = dicValueList3["regex_urllist_pagecount"].Count < 1 ? 0 : Convert.ToInt32(dicValueList3["regex_urllist_pagecount"][0].TrimBlank());
                if (urllist_index < urllist_max_index)
                {
                    urllist_index    = urllist_index + 1;
                    urllist_nextPage = string.Format(urllist_nextPage_para, urllist_index);
                }
                else
                {
                    urllist_nextPage = "";
                }
                //开始爬取当前页列表ip页面
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                string          message   = "";
                for (int i = 0; i < urlList.Count(); i++)
                {
                    string urlInfo = urlList[i];
                    string url     = urlInfo.Split('$')[0];
                    string urlHost = urlInfo.Split('$').Length < 2 ? "http://www.httpip.net/" : urlInfo.Split('$')[1];
requestpage:
                    if (!url.ToLower().Contains("http://"))
                    {
                        url = urlHost + url;
                    }
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(url, "gb2312", regexDic1);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"]; //所有ip集合
                    string        nextPage   = "";                           //下一页链接
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                        continue;
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic2);
                        string ip     = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        string ipArea = infoListDic["regex_area"].Count < 1 ? "" : infoListDic["regex_area"][0];
                        ip = ip.TrimBlank();
                        int result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                    }
                    if (!string.IsNullOrEmpty(nextPage))
                    {
                        if (!nextPage.ToLower().Contains("http://"))
                        {
                            url = urlHost + nextPage;
                        }
                        goto requestpage;
                    }
                }
                if (!string.IsNullOrEmpty(urllist_nextPage))
                {
                    urlList = new List <string>();
                    goto begin_nextpage;
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }
예제 #4
0
        public void Run()
        {
            //ip列表页ip信息
            RegexInfo regex_ipInfo = new RegexInfo("(\\&nbsp\\;[^<>]+<br />)", "$1");
            //ip列表页ip
            RegexInfo regex_ip = new RegexInfo("\\&nbsp\\;([^<>\\s]+) ([^<>\\s]+) [^<>]+<br />", "$1:$2");
            //ip列表页ip区域
            RegexInfo regex_area = new RegexInfo("\\&nbsp\\;[^<>\\s]+ [^<>\\s]+ ([^<>\\s]+) [^<>]+<br />", "$1");
            Dictionary <string, RegexInfo> regexDic1 = new Dictionary <string, RegexInfo>();

            regexDic1.Add("regex_ipInfo", regex_ipInfo);
            Dictionary <string, RegexInfo> regexDic2 = new Dictionary <string, RegexInfo>();

            regexDic2.Add("regex_ip", regex_ip);
            regexDic2.Add("regex_area", regex_area);
            //从文本文件中获取爬取ip分类页面列表
            List <string> urlList    = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/猫扑网IP代理ProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    urlList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            //但从当前页面中获取爬取ip分类列表
            RegexInfo regex_urllist                  = new RegexInfo("<DT><a href=\"([^\"]+)\" target=\"_blank\">[^<>]+</a></DT>", "$1");
            RegexInfo regex_urllist_pagecount        = new RegexInfo("<a href=\"http://www.itmop.com/proxy/catalog.asp\\?page=(\\d+)\">\\&raquo\\;</a></DIV>", "$1");
            Dictionary <string, RegexInfo> regexDic3 = new Dictionary <string, RegexInfo>();

            regexDic3.Add("regex_urllist", regex_urllist);
            regexDic3.Add("regex_urllist_pagecount", regex_urllist_pagecount);
            int    urllist_index         = 1;
            int    urllist_max_index     = 1;
            string urllist_nextPage_para = "http://www.itmop.com/proxy/catalog.asp?page={0}";   //下一页
            string urllist_nextPage      = string.Format(urllist_nextPage_para, urllist_index); //下一页

            try
            {
begin_nextpage:
                //获取ip列表页url
                Dictionary <string, List <string> > dicValueList3 = SpiderHelp.GetHtmlByRegexNotProxyIp(urllist_nextPage, "utf-8", regexDic3);
                List <string> urlList2 = dicValueList3["regex_urllist"];//所有ip列表集合
                urlList.AddRange(urlList2);
                //获取ip列表列表页的最大页数
                urllist_max_index = dicValueList3["regex_urllist_pagecount"].Count < 1 ? 0 : Convert.ToInt32(dicValueList3["regex_urllist_pagecount"][0].TrimBlank());
                if (urllist_index < urllist_max_index)
                {
                    urllist_index    = urllist_index + 1;
                    urllist_nextPage = string.Format(urllist_nextPage_para, urllist_index);
                }
                else
                {
                    urllist_nextPage = "";
                }
                //开始爬取当前页列表ip页面
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                string          message   = "";
                for (int i = 0; i < urlList.Count(); i++)
                {
                    string urlInfo = urlList[i];
                    urlInfo = urlInfo.Replace("&amp;", "&");
                    string url     = urlInfo.Split('$')[0];
                    string urlHost = urlInfo.Split('$').Length < 2 ? "http://www.itmop.com" : urlInfo.Split('$')[1];
requestpage:
                    //根据ip列表页url爬取ip信息
                    if (!url.ToLower().Contains("http://"))
                    {
                        url = urlHost + url;
                    }
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(url, "utf-8", regexDic1);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"];//所有ip集合
                    string        nextPage   = "";
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                        continue;
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic2);
                        string ip     = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        string ipArea = infoListDic["regex_area"].Count < 1 ? "" : infoListDic["regex_area"][0];
                        ipArea = ipArea.RemoveHeml();
                        ip     = ip.TrimBlank();
                        int result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                    }
                    if (!string.IsNullOrEmpty(nextPage))
                    {
                        url = urlHost + nextPage;
                        goto requestpage;
                    }
                }
                if (!string.IsNullOrEmpty(urllist_nextPage))
                {
                    urlList = new List <string>();
                    goto begin_nextpage;
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }
예제 #5
0
        public void ImportIp(object param)
        {
            string fileName = Convert.ToString(param);
            int    count    = 0;//成功个数

            //禁用其他按钮
            panel查询条件.Enabled   = false;
            btnExcel2.Enabled   = false;
            btnImportIp.Visible = false;
            //进度条位置设置
            panelImport.Visible       = true;
            panelImport.Top           = btnImportIp.Top;
            panelImport.Left          = btnImportIp.Left;
            labelImportBar.Text       = "0%";
            progressBarImportIp.Value = 0;
            SysData_ProxyIp existsObj = new SysData_ProxyIp();
            string          message   = "";
            object          missing   = System.Reflection.Missing.Value;

            Excel.Application excel = new Excel.Application();
            if (excel == null)
            {
                MessageBox.Show("Can't access excel");
            }
            else
            {
                excel.Visible = false; excel.UserControl = true;
                // 以只读的形式打开EXCEL文件
                Excel.Workbook wb = excel.Application.Workbooks.Open(fileName, missing, true, missing, missing, missing,
                                                                     missing, missing, missing, true, missing, missing, missing, missing, missing);
                //取得第一个工作薄
                Excel.Worksheet ws = (Excel.Worksheet)wb.Worksheets.get_Item(1);
                //取得总记录行数
                int rowsint = ws.UsedRange.Cells.Rows.Count; //得到行数
                //设置进度条范围
                progressBarImportIp.Maximum = rowsint;
                progressBarImportIp.Minimum = 0;
                //取得数据范围区域 (从第一行第一列到最后一行第二列)
                Excel.Range rng1 = ws.Cells.get_Range("A1", "B" + rowsint);   //item
                object[,] arryItem = (object[, ])rng1.Value2;
                for (int i = 1; i <= rowsint - 1; i++)
                {
                    string ip     = arryItem[i, 1].ToString();
                    string ipArea = arryItem[i, 2].ToString();
                    int    result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                    if (result == 1)
                    {
                        count = count + 1;
                    }

                    progressBarImportIp.Value = progressBarImportIp.Value + 1;
                    labelImportBar.Text       = 计算百分比(progressBarImportIp.Value, progressBarImportIp.Maximum);
                }
                progressBarImportIp.Value = progressBarImportIp.Maximum;
                labelImportBar.Text       = 计算百分比(progressBarImportIp.Value, progressBarImportIp.Maximum);
            }
            excel.Quit(); excel = null;
            Process[] procs = Process.GetProcessesByName("excel");
            foreach (Process pro in procs)
            {
                pro.Kill();//没有更好的方法,只有杀掉进程
            }
            GC.Collect();
            MessageBox.Show(string.Format("导入完成,成功导入可用的代理IP{0}个", count));
            //进度条设置
            panelImport.Visible = false;
            btnExcel2.Enabled   = true;
            btnImportIp.Visible = true;
            panel查询条件.Enabled   = true;
            labelImportBar.Text = "";
        }
예제 #6
0
        public void Run()
        {
            RegexInfo regex_ipInfo   = new RegexInfo("(<tr class=\"[^\"]*\">(?:(?!</tr>).)*</tr>)", "$1");
            RegexInfo regex_nextPage = new RegexInfo("<a class=\"next_page\" rel=\"next\" href=\"([^\"]+)\">下一页[^<>]*</a>", "$1");
            RegexInfo regex_ip       = new RegexInfo("<tr class=\"[^\"]*\"><td>(?:(?!</td>).)*</td><td>([^<>]+)</td><td>([^<>]+)</td><td><a href=\"[^\"]*\">[^<>]+</a></td>" +
                                                     "<td>[^<>]*</td><td>[^<>]*</td><td>(?:(?!</td>).)*</td><td>(?:(?!</td>).)*</td><td>[^<>]+</td>", "$1:$2");
            RegexInfo regex_area = new RegexInfo("<tr class=\"[^\"]*\"><td>(?:(?!</td>).)*</td><td>[^<>]+</td><td>[^<>]+</td><td><a href=\"[^\"]*\">([^<>]+)</a></td>" +
                                                 "<td>[^<>]*</td><td>[^<>]*</td><td>(?:(?!</td>).)*</td><td>(?:(?!</td>).)*</td><td>[^<>]+</td>", "$1");
            RegexInfo regex_date = new RegexInfo("<tr class=\"[^\"]*\"><td>(?:(?!</td>).)*</td><td>[^<>]+</td><td>[^<>]+</td><td><a href=\"[^\"]*\">[^<>]+</a></td>" +
                                                 "<td>[^<>]*</td><td>[^<>]*</td><td>(?:(?!</td>).)*</td><td>(?:(?!</td>).)*</td><td>([^<>]+)</td>", "$1");
            Dictionary <string, RegexInfo> regexDic1 = new Dictionary <string, RegexInfo>();

            regexDic1.Add("regex_ipInfo", regex_ipInfo);
            regexDic1.Add("regex_nextPage", regex_nextPage);
            Dictionary <string, RegexInfo> regexDic2 = new Dictionary <string, RegexInfo>();

            regexDic2.Add("regex_ip", regex_ip);
            regexDic2.Add("regex_area", regex_area);
            regexDic2.Add("regex_date", regex_date);
            //从文本文件中获取爬取配置
            List <string> urlList    = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/西刺ProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    urlList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            //urlList.Add("http://www.xici.net.co/nt/$http://www.xici.net.co/$2014-5-13");
            try
            {
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                string          message   = "";
                foreach (string urlInfo in urlList)
                {
                    string   url     = urlInfo.Split('$')[0];
                    string   urlHost = urlInfo.Split('$')[1];
                    DateTime maxDate = Convert.ToDateTime(urlInfo.Split('$')[2]);
requestpage:
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(url, "utf-8", regexDic1);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"];                                                      //所有ip集合
                    string        nextPage   = dicValueList["regex_nextPage"].Count < 1 ? "" : dicValueList["regex_nextPage"][0]; //下一页链接
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                        continue;
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic2);
                        string   ip     = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        string   ipArea = infoListDic["regex_area"].Count < 1 ? "" : infoListDic["regex_area"][0];
                        DateTime ipDate = infoListDic["regex_date"].Count < 1 ? DateTime.Now : Convert.ToDateTime(infoListDic["regex_date"][0]);

                        ip = ip.TrimBlank();

                        int result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                        //如果自定日期
                        if (ipDate < maxDate)
                        {
                            nextPage = null;
                            break;
                        }
                    }
                    if (!string.IsNullOrEmpty(nextPage))
                    {
                        url = urlHost + nextPage;
                        goto requestpage;
                    }
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }