public void Run()
        {
            List <string> orderList  = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/TaobaoProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    orderList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            string          url       = "http://121.199.38.28/ip/?tid={0}&num=1&ports=80,808,3128&filter=on";
            string          error     = "ERROR|订单剩余数量不足";
            SysData_ProxyIp existsObj = new SysData_ProxyIp();
            string          message   = "";

            foreach (string orderStr in orderList)
            {
                string requestUrl = string.Format(url, orderStr);
                while (true)
                {
                    string ipHtml = "";
begin:
                    try
                    {
                        ipHtml = SpiderHelp.GetHtml(requestUrl, "utf-8");
                    }
                    catch (Exception ex)
                    {
                        System.Threading.Thread.Sleep(3000);
                        goto begin;
                    }
                    if (ipHtml.Contains(error))
                    {
                        break;
                    }
                    int result = ProxyIpHelp.ImportProxyIp(ipHtml, "匿名", out existsObj, out message);
                    if (result != 1)
                    {
                        log.Debug(string.Format("{0},订单:{1},url:{2},ip:{3}", message, orderStr, url, ipHtml == null ? "null" : ipHtml));
                        continue;
                    }
                    else
                    {
                        log.Debug(string.Format("ip插入成功,订单:{0},url:{1},ip:{2}", orderStr, url, ipHtml == null ? "null" : ipHtml));
                    }
                }
                log.Debug(string.Format("订单:{0}导入完成,url:{1}", orderStr, url));
            }
            log.Debug("所有订单:导入完成,url:{1}");
        }
Beispiel #2
0
        /// <summary>
        /// 将当前网站对应的代理ip设置为不可用
        /// </summary>
        /// <param name="webId"></param>
        /// <param name="ip"></param>
        public static void SetNotEffectiveProxyIp(int webId, string ip, DataClass _dc = null)
        {
            if (!string.IsNullOrEmpty(ip))
            {
                DataClass dc = new DataClass(_dc);

                string sql = new StringBuilder().Append("select top 1 * from SysData_WebJoinProxyIp with(nolock) where WebId=")
                             .Append(webId).Append(" and ProxyIp in (select ID from SysData_ProxyIp with(nolock) where Ip='").Append(ip).Append("')").ToString();
                SysData_WebJoinProxyIp obj = dc.DB.ExecuteQuery <SysData_WebJoinProxyIp>(sql).FirstOrDefault();
                if (obj == null)
                {
                    SysData_ProxyIp ipObj = GetProxyIpByIp(ip, dc);
                    if (ipObj != null)
                    {
                        obj = new SysData_WebJoinProxyIp {
                            WebId = webId, ProxyIp = ipObj.ID, CreateTime = DateTime.Now, Status = WebProxyIpStatus2
                        };
                        obj = WebJoinProxyIpManager.Insert(obj, dc);
                    }
                }
                else
                {
                    obj.Status = WebProxyIpStatus2;
                    WebJoinProxyIpManager.Update(obj, dc);
                }
                //dc.DB.SubmitChanges();
                dc.Connection_Close();
                dc.Dispose();
            }
        }
        public void RunPageList(object url)
        {
            string _url        = Convert.ToString(url);
            string pageUrlPara = url + "&pageid={0}";

            try
            {
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                bool            notStop   = true;
                string          message   = "";
                int             pageIndex = 1;
                while (notStop)
                {
                    string paegUrl  = string.Format(pageUrlPara, pageIndex);
                    int    reqCount = 1;
reqBegin:
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(paegUrl, "gb2312", regexDic);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"];//所有ip集合
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        if (reqCount < 3)
                        {
                            reqCount = reqCount + 1;
                            goto reqBegin;
                        }
                        else
                        {
                            pageIndex = 1;
                            log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                            continue;
                        }
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic);
                        string ip = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        ip = ip.TrimBlank();
                        int result = ProxyIpHelp.ImportProxyIp(ip, "匿名", out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                    }
                    pageIndex = pageIndex + 1;
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }
Beispiel #4
0
        /// <summary>
        /// 根据ip获取ip信息
        /// </summary>
        /// <param name="ip"></param>
        /// <param name="_dc"></param>
        /// <returns></returns>
        public static SysData_ProxyIp GetProxyIpByIp(string ip, DataClass _dc = null)
        {
            DataClass       dc        = new DataClass(_dc);
            string          sql       = string.Format("select top 1 * from SysData_ProxyIp with(nolock) where Ip='{0}'", ip);
            SysData_ProxyIp existsObj = dc.DB.ExecuteQuery <SysData_ProxyIp>(sql).FirstOrDefault();

            dc.Connection_Close();
            dc.Dispose();
            return(existsObj);
        }
Beispiel #5
0
        public static SysData_ProxyIp Insert(SysData_ProxyIp obj, DataClass _db = null)
        {
            DataClass db = new DataClass(_db);

            if (obj != null)
            {
                long nowID = 0;
                db.DB.SysData_ProxyIp_Insert(obj.Ip, obj.CreateTime, obj.IpArea, out nowID);
                obj.ID = nowID;
            }
            return(obj);
        }
Beispiel #6
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="ip"></param>
        /// <param name="ipArea">ip所在地区,比如:广东省广州市电信</param>
        /// <param name="existsObj">已经存在或者插入成功的数据</param>
        /// <param name="checkExists">是否监测数据是否存在</param>
        /// <param name="message"></param>
        /// <param name="_dc"></param>
        /// <returns>1:成功,2:失败-ip已经存在,0:失败-其他失败</returns>
        public static int InsertProxyIp(string ip, string ipArea, out SysData_ProxyIp existsObj, out string message, bool checkExists = true, DataClass _dc = null)
        {
            existsObj = null;
            message   = "";
            ip        = ip.TrimBlank();
            if (string.IsNullOrEmpty(ip) || string.IsNullOrEmpty(ip.ToLower().Replace("http://", "")))
            {
                message = "ip不能为空";
                return(0);
            }
            if (string.IsNullOrEmpty(ip.TrimBlank()))
            {
                message = "ip所在地区不能为空";
                return(0);
            }
            ip = ip.Trim();
            DataClass dc = new DataClass(_dc);

            ip = ip.ToLower().Replace("http://", "");
            //如果需要检测数据是否存在
            if (checkExists)
            {
                existsObj = GetProxyIpByIp(ip, dc);
                if (existsObj != null)
                {
                    dc.Connection_Close();
                    dc.Dispose();
                    message = "ip已存在";
                    return(2);
                }
            }
            SysData_ProxyIp ipObj = new SysData_ProxyIp {
                CreateTime = DateTime.Now, Ip = ip, IpArea = ipArea
            };

            ipObj = Insert(ipObj, dc);
            //dc.DB.SysData_ProxyIp.InsertOnSubmit(ipObj);
            //dc.DB.SubmitChanges();
            dc.Connection_Close();
            dc.Dispose();
            existsObj = ipObj;
            return(1);
        }
Beispiel #7
0
        /// <summary>
        /// 根据网站获取可用的代理IP
        /// </summary>
        /// <param name="webId"></param>
        /// <param name="_dc"></param>
        /// <returns></returns>
        public static string GetEffectiveProxyIp(int webId, DataClass _dc = null)
        {
            //return null;
            string    nowIp = null;
            DataClass dc    = new DataClass(_dc);
            //View_WebJoinProxyIp obj = dc.DB.ExecuteQuery<View_WebJoinProxyIp>(string.Format("select top 1 * from View_WebJoinProxyIp where WebId={0} and Status={1} order by newId()",webId,WebProxyIpStatus1)).FirstOrDefault();// dc.DB.View_WebJoinProxyIp.Where(tbl => tbl.WebId == webId && tbl.Status == WebProxyIpStatus1).FirstOrDefault();
            //SysData_ProxyIp obj = dc.DB.ExecuteQuery<SysData_ProxyIp>("select top 1 * from SysData_ProxyIp  order by newId()").FirstOrDefault();// dc.DB.View_WebJoinProxyIp.Where(tbl => tbl.WebId == webId && tbl.Status == WebProxyIpStatus1).FirstOrDefault();
            string sql = new StringBuilder().Append("select top 1 * from SysData_ProxyIp as tbl1 with(nolock) where  ")
                         .Append("not exists (select * from SysData_WebJoinProxyIp with(nolock) where tbl1.ID=ProxyIp and WebId=").Append(webId)
                         .Append(" and [Status]=").Append(WebProxyIpStatus2)
                         .Append(") order by newId()").ToString();
            SysData_ProxyIp obj = dc.DB.ExecuteQuery <SysData_ProxyIp>(sql).FirstOrDefault();

            if (obj != null)
            {
                nowIp = obj.Ip;
            }
            dc.Connection_Close();
            dc.Dispose();
            return(nowIp);
        }
Beispiel #8
0
        /// <summary>
        /// 导入IP
        /// </summary>
        /// <param name="ip"></param>
        /// <param name="ipArea"></param>
        /// <param name="existsObj"></param>
        /// <param name="message"></param>
        /// <returns></returns>
        public static int ImportProxyIp(string ip, string ipArea, out SysData_ProxyIp existsObj, out string message)
        {
            existsObj = null;
            message   = "";
            if (string.IsNullOrEmpty(ip))
            {
                message = "ip不能为空";
                return(0);
            }
work:
            if (!WorkItemManager.CheckPassSpider())//****检查数据库是否有维护程序在执行******//
            {
                System.Threading.Thread.Sleep(60000);
                goto work;
            }
            existsObj = ProxyIpManager.GetProxyIpByIp(ip);
            if (existsObj != null)
            {
                message = "ip已存在";
                return(0);
            }
            if (!CheckProxyIp(ip))
            {
                message = "ip不可用";
                return(0);
            }
            int result = ProxyIpManager.InsertProxyIp(ip, ipArea, out existsObj, out message, checkExists: false);

            if (result != 1)
            {
                return(0);
            }
            else
            {
                message = "ip插入成功";
            }
            return(1);
        }
Beispiel #9
0
        public void Run()
        {
            //ip列表页ip信息
            RegexInfo regex_ipInfo = new RegexInfo("([^<>]+\\@[^<>]+</font>)", "$1");
            //ip列表页ip
            RegexInfo regex_ip = new RegexInfo("([^<>]+)\\@[^<>]+</font>", "$1");
            //ip列表页ip区域
            RegexInfo regex_area = new RegexInfo("[^<>]+\\#([^<>]+)</font>", "$1");
            Dictionary <string, RegexInfo> regexDic1 = new Dictionary <string, RegexInfo>();

            regexDic1.Add("regex_ipInfo", regex_ipInfo);
            Dictionary <string, RegexInfo> regexDic2 = new Dictionary <string, RegexInfo>();

            regexDic2.Add("regex_ip", regex_ip);
            regexDic2.Add("regex_area", regex_area);
            //从文本文件中获取爬取ip分类页面列表
            List <string> urlList    = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/Lastro网IP代理ProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    urlList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            //但从当前页面中获取爬取ip分类列表
            RegexInfo regex_urllist                  = new RegexInfo("<a href=\"([^\"]+)\"[^<>]*>【国内代理】[^<>]+</a>", "$1");
            RegexInfo regex_urllist_pagecount        = new RegexInfo("<span title=\"共[^\"]+页\"> /([^<>]+)页", "$1");
            Dictionary <string, RegexInfo> regexDic3 = new Dictionary <string, RegexInfo>();

            regexDic3.Add("regex_urllist", regex_urllist);
            regexDic3.Add("regex_urllist_pagecount", regex_urllist_pagecount);
            int    urllist_index         = 1;
            int    urllist_max_index     = 1;
            string urllist_nextPage_para = "http://www.httpip.net/forum-36-{0}.html";           //下一页
            string urllist_nextPage      = string.Format(urllist_nextPage_para, urllist_index); //下一页

            try
            {
begin_nextpage:
                Dictionary <string, List <string> > dicValueList3 = SpiderHelp.GetHtmlByRegexNotProxyIp(urllist_nextPage, "gb2312", regexDic3);
                List <string> urlList2 = dicValueList3["regex_urllist"];//所有ip列表集合
                urlList.AddRange(urlList2);
                urllist_max_index = dicValueList3["regex_urllist_pagecount"].Count < 1 ? 0 : Convert.ToInt32(dicValueList3["regex_urllist_pagecount"][0].TrimBlank());
                if (urllist_index < urllist_max_index)
                {
                    urllist_index    = urllist_index + 1;
                    urllist_nextPage = string.Format(urllist_nextPage_para, urllist_index);
                }
                else
                {
                    urllist_nextPage = "";
                }
                //开始爬取当前页列表ip页面
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                string          message   = "";
                for (int i = 0; i < urlList.Count(); i++)
                {
                    string urlInfo = urlList[i];
                    string url     = urlInfo.Split('$')[0];
                    string urlHost = urlInfo.Split('$').Length < 2 ? "http://www.httpip.net/" : urlInfo.Split('$')[1];
requestpage:
                    if (!url.ToLower().Contains("http://"))
                    {
                        url = urlHost + url;
                    }
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(url, "gb2312", regexDic1);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"]; //所有ip集合
                    string        nextPage   = "";                           //下一页链接
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                        continue;
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic2);
                        string ip     = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        string ipArea = infoListDic["regex_area"].Count < 1 ? "" : infoListDic["regex_area"][0];
                        ip = ip.TrimBlank();
                        int result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                    }
                    if (!string.IsNullOrEmpty(nextPage))
                    {
                        if (!nextPage.ToLower().Contains("http://"))
                        {
                            url = urlHost + nextPage;
                        }
                        goto requestpage;
                    }
                }
                if (!string.IsNullOrEmpty(urllist_nextPage))
                {
                    urlList = new List <string>();
                    goto begin_nextpage;
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }
Beispiel #10
0
        /// <summary>
        /// 给网站插入代理IP(如果ip在ip表里存在则读取后插入到网站ip表,否则新增ip)
        /// </summary>
        /// <param name="ip"></param>
        /// <param name="ipArea">ip所在地区,比如:广东省广州市电信</param>
        /// <param name="webIds"></param>
        /// <param name="addList">新增成功的网站代理ip信息</param>
        /// <param name="message"></param>
        /// <param name="_dc"></param>
        /// <returns></returns>
        public static int InsertWebJoinProxyIp(string ip, string ipArea, int[] webIds, out List <SysData_WebJoinProxyIp> addList, out string message, DataClass _dc = null)
        {
            message = "";
            addList = new List <SysData_WebJoinProxyIp>();
            if (string.IsNullOrEmpty(ip) || webIds == null || webIds.Length < 1)
            {
                message = "请输入ip和网站";
                return(0);
            }
            DataClass dc = new DataClass(_dc);

            try
            {
                SysData_ProxyIp ipObj  = null;
                int             result = ProxyIpManager.InsertProxyIp(ip, ipArea, out ipObj, out message, _dc: dc);
                if (ipObj == null)
                {
                    dc.Connection_Close();
                    dc.Dispose();
                    message = "ip不能为空";
                    return(0);
                }
                List <SysData_WebJoinProxyIp> list = dc.DB.SysData_WebJoinProxyIp.Where(tbl => tbl.ProxyIp == ipObj.ID && webIds.Contains(tbl.WebId)).ToList();
                if (list.Count == webIds.Length)
                {
                    dc.Connection_Close();
                    dc.Dispose();
                    message = "此网站代理IP已存在";
                    return(0);
                }
                foreach (int webId in webIds)
                {
                    if (list.Where(p => p.WebId == webId).FirstOrDefault() == null)
                    {
                        SysData_WebJoinProxyIp wipObj = new SysData_WebJoinProxyIp();
                        wipObj.ProxyIp    = ipObj.ID;
                        wipObj.WebId      = webId;
                        wipObj.Status     = ProxyIpManager.WebProxyIpStatus1;
                        wipObj.CreateTime = DateTime.Now;
                        addList.Add(wipObj);
                    }
                }
                if (addList != null && addList.Count > 0)
                {
                    dc.DB.SysData_WebJoinProxyIp.InsertAllOnSubmit <SysData_WebJoinProxyIp>(addList);
                    dc.DB.SubmitChanges();
                }
                dc.Connection_Close();
                dc.Dispose();
            }
            catch (Exception ex)
            {
                dc.Connection_Close();
                dc.Dispose();
                message = "系统异常";
                log.Error(string.Format("(插入网站代理ip失败)InsertWebJoinProxyIp(string ip={0}, int[] webIds={1},out string message, DataClass _dc = null)"
                                        , ip == null ? "null" : null, webIds == null ? 0 : webIds.Length),
                          ex
                          );
                return(0);
            }
            return(1);
        }
Beispiel #11
0
        public void Run()
        {
            //ip列表页ip信息
            RegexInfo regex_ipInfo = new RegexInfo("(\\&nbsp\\;[^<>]+<br />)", "$1");
            //ip列表页ip
            RegexInfo regex_ip = new RegexInfo("\\&nbsp\\;([^<>\\s]+) ([^<>\\s]+) [^<>]+<br />", "$1:$2");
            //ip列表页ip区域
            RegexInfo regex_area = new RegexInfo("\\&nbsp\\;[^<>\\s]+ [^<>\\s]+ ([^<>\\s]+) [^<>]+<br />", "$1");
            Dictionary <string, RegexInfo> regexDic1 = new Dictionary <string, RegexInfo>();

            regexDic1.Add("regex_ipInfo", regex_ipInfo);
            Dictionary <string, RegexInfo> regexDic2 = new Dictionary <string, RegexInfo>();

            regexDic2.Add("regex_ip", regex_ip);
            regexDic2.Add("regex_area", regex_area);
            //从文本文件中获取爬取ip分类页面列表
            List <string> urlList    = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/猫扑网IP代理ProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    urlList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            //但从当前页面中获取爬取ip分类列表
            RegexInfo regex_urllist                  = new RegexInfo("<DT><a href=\"([^\"]+)\" target=\"_blank\">[^<>]+</a></DT>", "$1");
            RegexInfo regex_urllist_pagecount        = new RegexInfo("<a href=\"http://www.itmop.com/proxy/catalog.asp\\?page=(\\d+)\">\\&raquo\\;</a></DIV>", "$1");
            Dictionary <string, RegexInfo> regexDic3 = new Dictionary <string, RegexInfo>();

            regexDic3.Add("regex_urllist", regex_urllist);
            regexDic3.Add("regex_urllist_pagecount", regex_urllist_pagecount);
            int    urllist_index         = 1;
            int    urllist_max_index     = 1;
            string urllist_nextPage_para = "http://www.itmop.com/proxy/catalog.asp?page={0}";   //下一页
            string urllist_nextPage      = string.Format(urllist_nextPage_para, urllist_index); //下一页

            try
            {
begin_nextpage:
                //获取ip列表页url
                Dictionary <string, List <string> > dicValueList3 = SpiderHelp.GetHtmlByRegexNotProxyIp(urllist_nextPage, "utf-8", regexDic3);
                List <string> urlList2 = dicValueList3["regex_urllist"];//所有ip列表集合
                urlList.AddRange(urlList2);
                //获取ip列表列表页的最大页数
                urllist_max_index = dicValueList3["regex_urllist_pagecount"].Count < 1 ? 0 : Convert.ToInt32(dicValueList3["regex_urllist_pagecount"][0].TrimBlank());
                if (urllist_index < urllist_max_index)
                {
                    urllist_index    = urllist_index + 1;
                    urllist_nextPage = string.Format(urllist_nextPage_para, urllist_index);
                }
                else
                {
                    urllist_nextPage = "";
                }
                //开始爬取当前页列表ip页面
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                string          message   = "";
                for (int i = 0; i < urlList.Count(); i++)
                {
                    string urlInfo = urlList[i];
                    urlInfo = urlInfo.Replace("&amp;", "&");
                    string url     = urlInfo.Split('$')[0];
                    string urlHost = urlInfo.Split('$').Length < 2 ? "http://www.itmop.com" : urlInfo.Split('$')[1];
requestpage:
                    //根据ip列表页url爬取ip信息
                    if (!url.ToLower().Contains("http://"))
                    {
                        url = urlHost + url;
                    }
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(url, "utf-8", regexDic1);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"];//所有ip集合
                    string        nextPage   = "";
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                        continue;
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic2);
                        string ip     = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        string ipArea = infoListDic["regex_area"].Count < 1 ? "" : infoListDic["regex_area"][0];
                        ipArea = ipArea.RemoveHeml();
                        ip     = ip.TrimBlank();
                        int result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                    }
                    if (!string.IsNullOrEmpty(nextPage))
                    {
                        url = urlHost + nextPage;
                        goto requestpage;
                    }
                }
                if (!string.IsNullOrEmpty(urllist_nextPage))
                {
                    urlList = new List <string>();
                    goto begin_nextpage;
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }
Beispiel #12
0
        public void ImportIp(object param)
        {
            string fileName = Convert.ToString(param);
            int    count    = 0;//成功个数

            //禁用其他按钮
            panel查询条件.Enabled   = false;
            btnExcel2.Enabled   = false;
            btnImportIp.Visible = false;
            //进度条位置设置
            panelImport.Visible       = true;
            panelImport.Top           = btnImportIp.Top;
            panelImport.Left          = btnImportIp.Left;
            labelImportBar.Text       = "0%";
            progressBarImportIp.Value = 0;
            SysData_ProxyIp existsObj = new SysData_ProxyIp();
            string          message   = "";
            object          missing   = System.Reflection.Missing.Value;

            Excel.Application excel = new Excel.Application();
            if (excel == null)
            {
                MessageBox.Show("Can't access excel");
            }
            else
            {
                excel.Visible = false; excel.UserControl = true;
                // 以只读的形式打开EXCEL文件
                Excel.Workbook wb = excel.Application.Workbooks.Open(fileName, missing, true, missing, missing, missing,
                                                                     missing, missing, missing, true, missing, missing, missing, missing, missing);
                //取得第一个工作薄
                Excel.Worksheet ws = (Excel.Worksheet)wb.Worksheets.get_Item(1);
                //取得总记录行数
                int rowsint = ws.UsedRange.Cells.Rows.Count; //得到行数
                //设置进度条范围
                progressBarImportIp.Maximum = rowsint;
                progressBarImportIp.Minimum = 0;
                //取得数据范围区域 (从第一行第一列到最后一行第二列)
                Excel.Range rng1 = ws.Cells.get_Range("A1", "B" + rowsint);   //item
                object[,] arryItem = (object[, ])rng1.Value2;
                for (int i = 1; i <= rowsint - 1; i++)
                {
                    string ip     = arryItem[i, 1].ToString();
                    string ipArea = arryItem[i, 2].ToString();
                    int    result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                    if (result == 1)
                    {
                        count = count + 1;
                    }

                    progressBarImportIp.Value = progressBarImportIp.Value + 1;
                    labelImportBar.Text       = 计算百分比(progressBarImportIp.Value, progressBarImportIp.Maximum);
                }
                progressBarImportIp.Value = progressBarImportIp.Maximum;
                labelImportBar.Text       = 计算百分比(progressBarImportIp.Value, progressBarImportIp.Maximum);
            }
            excel.Quit(); excel = null;
            Process[] procs = Process.GetProcessesByName("excel");
            foreach (Process pro in procs)
            {
                pro.Kill();//没有更好的方法,只有杀掉进程
            }
            GC.Collect();
            MessageBox.Show(string.Format("导入完成,成功导入可用的代理IP{0}个", count));
            //进度条设置
            panelImport.Visible = false;
            btnExcel2.Enabled   = true;
            btnImportIp.Visible = true;
            panel查询条件.Enabled   = true;
            labelImportBar.Text = "";
        }
Beispiel #13
0
        public void Run()
        {
            RegexInfo regex_ipInfo   = new RegexInfo("(<tr class=\"[^\"]*\">(?:(?!</tr>).)*</tr>)", "$1");
            RegexInfo regex_nextPage = new RegexInfo("<a class=\"next_page\" rel=\"next\" href=\"([^\"]+)\">下一页[^<>]*</a>", "$1");
            RegexInfo regex_ip       = new RegexInfo("<tr class=\"[^\"]*\"><td>(?:(?!</td>).)*</td><td>([^<>]+)</td><td>([^<>]+)</td><td><a href=\"[^\"]*\">[^<>]+</a></td>" +
                                                     "<td>[^<>]*</td><td>[^<>]*</td><td>(?:(?!</td>).)*</td><td>(?:(?!</td>).)*</td><td>[^<>]+</td>", "$1:$2");
            RegexInfo regex_area = new RegexInfo("<tr class=\"[^\"]*\"><td>(?:(?!</td>).)*</td><td>[^<>]+</td><td>[^<>]+</td><td><a href=\"[^\"]*\">([^<>]+)</a></td>" +
                                                 "<td>[^<>]*</td><td>[^<>]*</td><td>(?:(?!</td>).)*</td><td>(?:(?!</td>).)*</td><td>[^<>]+</td>", "$1");
            RegexInfo regex_date = new RegexInfo("<tr class=\"[^\"]*\"><td>(?:(?!</td>).)*</td><td>[^<>]+</td><td>[^<>]+</td><td><a href=\"[^\"]*\">[^<>]+</a></td>" +
                                                 "<td>[^<>]*</td><td>[^<>]*</td><td>(?:(?!</td>).)*</td><td>(?:(?!</td>).)*</td><td>([^<>]+)</td>", "$1");
            Dictionary <string, RegexInfo> regexDic1 = new Dictionary <string, RegexInfo>();

            regexDic1.Add("regex_ipInfo", regex_ipInfo);
            regexDic1.Add("regex_nextPage", regex_nextPage);
            Dictionary <string, RegexInfo> regexDic2 = new Dictionary <string, RegexInfo>();

            regexDic2.Add("regex_ip", regex_ip);
            regexDic2.Add("regex_area", regex_area);
            regexDic2.Add("regex_date", regex_date);
            //从文本文件中获取爬取配置
            List <string> urlList    = new List <string>();
            string        configPath = AppDomain.CurrentDomain.BaseDirectory + "IP代理/Config/西刺ProxyIpConfig.txt";
            StreamReader  sr         = new StreamReader(configPath);

            while (true)
            {
                string str = sr.ReadLine();
                if (str == null)
                {
                    break;
                }
                else
                {
                    urlList.Add(str);
                }
            }
            sr.Close();
            sr.Dispose();
            //urlList.Add("http://www.xici.net.co/nt/$http://www.xici.net.co/$2014-5-13");
            try
            {
                SysData_ProxyIp existsObj = new SysData_ProxyIp();
                string          message   = "";
                foreach (string urlInfo in urlList)
                {
                    string   url     = urlInfo.Split('$')[0];
                    string   urlHost = urlInfo.Split('$')[1];
                    DateTime maxDate = Convert.ToDateTime(urlInfo.Split('$')[2]);
requestpage:
                    Dictionary <string, List <string> > dicValueList = SpiderHelp.GetHtmlByRegexNotProxyIp(url, "utf-8", regexDic1);
                    List <string> ipInfoList = dicValueList["regex_ipInfo"];                                                      //所有ip集合
                    string        nextPage   = dicValueList["regex_nextPage"].Count < 1 ? "" : dicValueList["regex_nextPage"][0]; //下一页链接
                    if (ipInfoList == null || ipInfoList.Count < 1)
                    {
                        log.Debug(string.Format("未获取到IP列表,url:{0}", url));
                        continue;
                    }
                    foreach (string ipInfo in ipInfoList)
                    {
                        Dictionary <string, List <string> > infoListDic = SpiderHelp.GetStrByRegex(ipInfo, regexDic2);
                        string   ip     = infoListDic["regex_ip"].Count < 1 ? "" : infoListDic["regex_ip"][0];
                        string   ipArea = infoListDic["regex_area"].Count < 1 ? "" : infoListDic["regex_area"][0];
                        DateTime ipDate = infoListDic["regex_date"].Count < 1 ? DateTime.Now : Convert.ToDateTime(infoListDic["regex_date"][0]);

                        ip = ip.TrimBlank();

                        int result = ProxyIpHelp.ImportProxyIp(ip, ipArea, out existsObj, out message);
                        if (result != 1)
                        {
                            log.Debug(string.Format("{0},url:{1},ip:{2}", message, url, ip == null ? "null" : ip));
                            continue;
                        }
                        else
                        {
                            log.Debug(string.Format("ip插入成功,url:{0},ip:{1}", url, ip == null ? "null" : ip));
                        }
                        //如果自定日期
                        if (ipDate < maxDate)
                        {
                            nextPage = null;
                            break;
                        }
                    }
                    if (!string.IsNullOrEmpty(nextPage))
                    {
                        url = urlHost + nextPage;
                        goto requestpage;
                    }
                }
            }
            catch (Exception ex)
            {
                log.Error("系统异常", ex);
            }
        }