Пример #1
0
        public void Run()
        {
            log.Debug("所有ip正在删除中...........");
work:
            if (!WorkItemManager.CheckPassSpider())//****检查数据库是否有维护程序在执行******//
            {
                System.Threading.Thread.Sleep(60000);
                goto work;
            }
            ProxyIpManager.DeleteNotEffectiveProxyIp();
            log.Debug("所有ip删除完成...........");
        }
Пример #2
0
        /// <summary>
        /// 导入IP
        /// </summary>
        /// <param name="ip"></param>
        /// <param name="ipArea"></param>
        /// <param name="existsObj"></param>
        /// <param name="message"></param>
        /// <returns></returns>
        public static int ImportProxyIp(string ip, string ipArea, out SysData_ProxyIp existsObj, out string message)
        {
            existsObj = null;
            message   = "";
            if (string.IsNullOrEmpty(ip))
            {
                message = "ip不能为空";
                return(0);
            }
work:
            if (!WorkItemManager.CheckPassSpider())//****检查数据库是否有维护程序在执行******//
            {
                System.Threading.Thread.Sleep(60000);
                goto work;
            }
            existsObj = ProxyIpManager.GetProxyIpByIp(ip);
            if (existsObj != null)
            {
                message = "ip已存在";
                return(0);
            }
            if (!CheckProxyIp(ip))
            {
                message = "ip不可用";
                return(0);
            }
            int result = ProxyIpManager.InsertProxyIp(ip, ipArea, out existsObj, out message, checkExists: false);

            if (result != 1)
            {
                return(0);
            }
            else
            {
                message = "ip插入成功";
            }
            return(1);
        }
Пример #3
0
        static void Main(string[] args)
        {
            string[] runType = args;
            if (args == null || args.Length < 1)
            {
                string paraStr = Console.ReadLine();
                runType = paraStr.Split(' ');
            }
            if (runType[0].Contains("1"))
            {
                WorkItemManager.SetAllStop(1);
                Console.Write("爬取服务器暂停成功");
                return;
            }
            else if (runType[0].Contains("0"))
            {
                WorkItemManager.SetAllStop(0);
                Console.Write("爬取服务器启动成功");
                return;
            }
work:
            if (!WorkItemManager.CheckPassSpider())//****检查数据库是否有维护程序在执行******//
            {
                System.Threading.Thread.Sleep(60000);
                goto work;
            }
            if (!runType[0].Contains("IP代理"))
            {
                #region (设置日志名称)
                Dictionary <string, List <string> > dic = new Dictionary <string, List <string> >();
                //将参数添加到字典,用于生成日志文件名
                foreach (string str in runType)
                {
                    string        strIndex1 = str.Split('.')[0];
                    string        strIndex2 = str.Split('.')[1];
                    List <string> strList   = new List <string>();
                    if (!dic.ContainsKey(strIndex1))
                    {
                        dic.Add(strIndex1, new List <string>());
                    }
                    strList = dic[strIndex1];
                    strList.Add(strIndex2);
                    dic[strIndex1] = strList;
                }
                StringBuilder sb = new StringBuilder();
                foreach (KeyValuePair <string, List <string> > kvp in dic)
                {
                    string        key  = kvp.Key;
                    List <string> list = kvp.Value;
                    sb.Append(key).Append("_");
                    for (int i = 0; i < list.Count; i++)
                    {
                        string str = list[i];
                        if (i < list.Count - 1)
                        {
                            sb.Append(str).Append(".");
                        }
                        else
                        {
                            sb.Append(str).Append(";");
                        }
                    }
                }
                InitLogger("(" + sb.ToString() + ")");
                #endregion
                foreach (string str in runType)
                {
                    //运行操作
                    INewDataRum rum       = null;
                    string      directory = GetConfigDire();
                    string      dllFill   = directory + "\\FxtSpider.RunSource.dll";
                    Type        type      = Assembly.LoadFile(dllFill).GetType("FxtSpider.RunSource." + str);
                    if (type != null)
                    {
                        rum = Activator.CreateInstance(type) as INewDataRum;
                    }
                    else
                    {
                        type         = Assembly.LoadFile(dllFill).GetType("FxtSpider.RunSource." + str.Split('.')[0] + ".其他城市");
                        rum          = Activator.CreateInstance(type) as INewDataRum;
                        rum.CityName = str.Split('.')[1];
                    }
                    rum.start();
                }
            }
            else
            {
                #region (设置日志名称)
                Dictionary <string, List <string> > dic = new Dictionary <string, List <string> >();
                StringBuilder sb = new StringBuilder();
                //将参数添加到字典,用于生成日志文件名
                foreach (string str in runType)
                {
                    sb.Append(str).Append(";");
                }
                InitLogger("(" + sb.ToString() + ")");
                #endregion
                foreach (string str in runType)
                {
                    string directory = GetConfigDire();
                    string dllFill   = directory + "\\FxtSpider.RunSource.dll";
                    string className = str.Split('.')[0] + "." + str.Split('.')[1];
                    object objClass  = Assembly.LoadFile(dllFill).CreateInstance("FxtSpider.RunSource." + className);
                    if (objClass != null)
                    {
                        MethodInfo method = objClass.GetType().GetMethod(str.Split('.')[2]);
                        ArrayList  al     = new ArrayList();
                        al.Add(method);
                        al.Add(objClass);
                        Thread m_thread = new Thread(new ParameterizedThreadStart(ExecIpSpider));
                        m_thread.Start(al);
                    }
                }
            }
        }
Пример #4
0
        /// <summary>
        /// 根据页面url,页面编码,正则表达式;获取相应字符串
        /// </summary>
        /// <param name="url">页面url</param>
        /// <param name="encoding">页面编码</param>
        /// <param name="dic">正则表达式规则</param>
        /// <returns></returns>
        public static Dictionary <string, List <string> > GetHtmlByRegex(string url, string encoding, Dictionary <string, RegexInfo> dic, 网站表 webObj, int cityId, string referer = null, bool keepAlive = false, int timeout = 60000)
        {
            string NowProxyIp     = null;
            int    网络异常重试次数       = 0;
            int    网络异常时代理ip更换次数  = 0;
            int    验证码异常时代理ip更换次数 = 0;
            string resultHtml     = "";
            string ipStr          = NowProxyIp == null ? "" : NowProxyIp + ",";

begin:
            Dictionary <string, List <string> > resultDic = new Dictionary <string, List <string> >();

            if (dic == null)
            {
                return(resultDic);
            }
            try
            {
                resultHtml = GetHtml(url, encoding, proxyIp: NowProxyIp, referer: referer, keepAlive: keepAlive, timeout: timeout);
            }
            catch (Exception ex)
            {
                if (网络异常重试次数 < 2)
                {
                    System.Threading.Thread.Sleep(3000);
                    网络异常重试次数++;
                    goto begin;
                }
                if (webObj.BlockadeOfIP)
                {
                    if (网络异常时代理ip更换次数 < 3)
                    {
                        System.Threading.Thread.Sleep(2000);
                        网络异常时代理ip更换次数++;

work:
                        if (!WorkItemManager.CheckPassSpider())//检查数据库是否有维护程序在执行
                        {
                            System.Threading.Thread.Sleep(60000);
                            goto work;
                        }
                        ProxyIpManager.SetNotEffectiveProxyIp(webObj.ID, NowProxyIp); //设置当前代理ip为不可用
                        NowProxyIp = ProxyIpManager.GetEffectiveProxyIp(webObj.ID);   //获取新代理ip
                        ipStr      = ipStr + NowProxyIp + ",";
                        goto begin;
                    }
                }
                log.Error(string.Format("SpiderHouse:(requestUrl:{0}--请求异常)", url), ex);
                //记录爬取失败原因和信息(网络异常)
work2:
                if (!WorkItemManager.CheckPassSpider())//检查数据库是否有维护程序在执行
                {
                    System.Threading.Thread.Sleep(60000);
                    goto work2;
                }
                DatSpiderErrorLogManager.InsertError(cityId, webObj.ID, url, SysCodeManager.Code_1_1, "所用ip:" + ipStr);
                goto end;
            }
end:
            resultHtml = Regex.Replace(resultHtml, @"(([\r\n])[\s]+|[\r\n]+|[\t]+)", "", RegexOptions.IgnoreCase);
            //检测是否需要验证码
            bool checkError = false;//记录是否为验证码错误

            if (webObj.BlockadeOfIPType == WebsiteManager.BlockadeOfIPType1)
            {
                List <string> checkList = GetStrByRegexByIndex(resultHtml, regex_checkcode);
                if (checkList != null && checkList.Count > 0)
                {
                    网络异常时代理ip更换次数 = 0;
                    if (验证码异常时代理ip更换次数 < 3)
                    {
                        验证码异常时代理ip更换次数++;
                        //记录爬取失败原因和信息(网络异常)
work3:
                        if (!WorkItemManager.CheckPassSpider())//检查数据库是否有维护程序在执行
                        {
                            System.Threading.Thread.Sleep(60000);
                            goto work3;
                        }
                        ProxyIpManager.SetNotEffectiveProxyIp(webObj.ID, NowProxyIp); //设置当前代理ip为不可用
                        NowProxyIp = ProxyIpManager.GetEffectiveProxyIp(webObj.ID);   //获取新代理ip
                        ipStr      = ipStr + NowProxyIp + ",";
                        goto begin;
                    }
                    else
                    {
                        checkError = true;
                        //记录爬取时需要验证码
                        //记录爬取失败原因和信息(网络异常)
work4:
                        if (!WorkItemManager.CheckPassSpider())//检查数据库是否有维护程序在执行
                        {
                            System.Threading.Thread.Sleep(60000);
                            goto work4;
                        }
                        DatSpiderErrorLogManager.InsertError(cityId, webObj.ID, url, SysCodeManager.Code_1_3,
                                                             string.Format("所用ip:{0}", ipStr)
                                                             );
                    }
                    log.Debug(string.Format("SpiderHouse:(requestUrl:{0}--请求异常:需输入验证码)", url));
                }
            }
            foreach (KeyValuePair <string, RegexInfo> kvp in dic)
            {
                string        key  = kvp.Key;
                List <string> list = GetStrByRegexByIndex(resultHtml, kvp.Value);
                resultDic.Add(key, list);
                if (webObj != null && webObj.ID == WebsiteManager.搜房网_ID && resultHtml.Contains(">此房源已售出!<") && resultHtml.Contains(">您可以选择查看:<"))
                {
                    continue;
                }
                //正则表达式为重要字段&&无网络异常&&无验证码异常&&正则表达式不为null
                if (key.Contains("*") && !string.IsNullOrEmpty(resultHtml) && !checkError && kvp.Value != null && !string.IsNullOrEmpty(kvp.Value.RegexStr))
                {
                    //通过规则未获取到信息&&不为验证码异常
                    if (list == null || list.Count < 1)
                    {
                        //记录爬取失败原因和信息(通过规则未获取到字符)
                        DatSpiderErrorLogManager.InsertError(cityId, webObj.ID, url, SysCodeManager.Code_1_2,
                                                             string.Format("描述:{0},规则:{1},索引:{2},其他规则个数:{3},所用ip:{4}",
                                                                           key,
                                                                           kvp.Value.RegexStr,
                                                                           kvp.Value.RegexIndex,
                                                                           kvp.Value.RegexInfoList == null ? 0 : kvp.Value.RegexInfoList.Count,
                                                                           ipStr
                                                                           )
                                                             );
                    }
                }
            }
            if (webObj != null && webObj.ID == WebsiteManager.搜房网_ID && resultHtml.Contains(">此房源已售出!<") && resultHtml.Contains(">您可以选择查看:<"))
            {
                resultDic.Add("NotData", new List <string> {
                    "1"
                });
            }
            return(resultDic);
        }