Пример #1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="spiderinfo"></param>
        /// <returns></returns>
        public bool Control(Spiderinfo spiderinfo)
        {
            bool flg = false;

            #region 爬虫监控系统
            string insertSql = string.Format("insert ignore into spiderinfo(SpiderProgramName,TaskName,SiteName,TemplateName,StartDT,EndDT,RequestMount,CatchMount,StoreMount,ProxyRequestMount,ProxyPercent,VertifyMount,VertifyPercent,TotalMount,TotalLength,IPAddress,Ext) values('{0}','{1}','{2}','{3}','{4}','{5}',{6},{7},{8},{9},{10},{11},{12},{13},{14},'{15}','{16}')", spiderinfo.SpiderProgramName, spiderinfo.TaskName, spiderinfo.SiteName, spiderinfo.TemplateName, spiderinfo.StartDT, spiderinfo.EndDT, spiderinfo.RequestMount, spiderinfo.CatchMount, spiderinfo.StoreMount, spiderinfo.ProxyRequestMount, spiderinfo.ProxyPercent, spiderinfo.VertifyMount, spiderinfo.VertifyPercent, spiderinfo.TotalMount, spiderinfo.TotalLength, spiderinfo.IPAddress, spiderinfo.Ext);
            int    itryMax   = 3;
            while (itryMax > 0)
            {
                int iflg = MySqlHelp.Insert(AliyunConn, insertSql, spiderinfo.TaskName + "插入数据失败记录.txt");
                if (iflg >= 0)
                {
                    itryMax = 0;
                    flg     = true;
                }
                else
                {
                    itryMax--;
                }
                string checkInfo =
                    string.Format("爬虫:{0} 请求量:{1} 抓取量:{2} 入库量:{3} 代理请求量:{4} 代理成功率:{5} 打码次数:{6} 成功打码:{7} 打码成功率:{8} 总抓取量:{9} 时间:{10}",
                                  spiderinfo.TaskName,
                                  spiderinfo.RequestMount, spiderinfo.CatchMount, spiderinfo.StoreMount, spiderinfo.ProxyRequestMount,
                                  spiderinfo.ProxyPercent, spiderinfo.VertifyMount, spiderinfo.VertifyTrueMount, spiderinfo.VertifyPercent,
                                  spiderinfo.TotalMount, DateTime.Now);
                Console.WriteLine("【{0}】{1}", iflg, checkInfo);
                CLog.DiaryLog(checkInfo, spiderinfo.TaskName + "(每半小时统计一次)_" + DateTime.Now.ToString("yyyyMMdd") + ".txt");
            }
            #endregion
            return(flg);
        }
Пример #2
0
        /// <summary>
        /// 执行任务
        /// </summary>
        /// <param name="allInfoUrls">任务源集合</param>
        public void DoTask(List <TaskUrlConfig> allInfoUrls)
        {
            List <Action> lss = new List <Action>();

            for (int i = 0; i < allInfoUrls.Count; i++)
            {
                //爬虫请求次数计数
                StaticConfig.Spiderinfo.RequestMount++;
                StaticConfig.CountConfig.Itotal++;
                MyParallel grabAllInfo = new MyParallel
                {
                    //接收任务对象
                    TaskUrl = allInfoUrls[i],
                    //代理初始化,默认使用本地代理
                    //需要用到阿布云代理的,可以开启fiddler代理或者把本地代理改成阿布云的代理
                    ProxyConfig =
                    {
                        // [-or-] 本地 fiddler代理阿布云
                        Proxy     = new WebProxy("192.168.1.1"),
                        // [-or-] 阿布云
                        //Proxy = new WebProxy("http://http-dyn.abuyun.com:9020"),
                        ProxyUser = "******",
                        ProxyPass = "******"
                    },
                    //初始化账号Cookie信息,下面为不需要用到账号的情况
                    CkConfig =
                    {
                        Cookie = ""
                    }
                };
                lss.Add(new MyParallel(grabAllInfo).MyRequest);
                //此处控制并发数量,并发数量可以在配置文件设置
                if (i % StaticConfig.SpConfig.ActionLssNum == StaticConfig.SpConfig.ActionLssNum - 1 ||
                    i == allInfoUrls.Count - 1)
                {
                    try
                    {
                        //开始并发
                        System.Threading.Tasks.Parallel.Invoke(lss.ToArray());
                        Console.WriteLine("***********************************************************");
                        //TODO:回收任务
                        GcTask(StaticConfig.TaskUrls, StaticConfig.AddTaskUrls);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("并发出错:{0}>>>{1}", ex.Message, DateTime.Now);
                        CLog.DiaryLog(ex.Message,
                                      $"\\{StaticConfig.Spiderinfo.TaskName}并发出错\\{StaticConfig.Spiderinfo.TaskName}并发出错_{DateTime.Now:yyyyMMdd}.txt");
                        System.Threading.Thread.Sleep(new TimeSpan(0, 1, 0));
                    }
                    //初始化变量
                    lss = new List <Action>();
                    StaticConfig.TaskUrls    = new List <TaskUrlConfig>();
                    StaticConfig.AddTaskUrls = new List <TaskUrlConfig>();
                    System.Threading.Thread.Sleep(new TimeSpan(0, 0, StaticConfig.SpConfig.ActionSleepTime));
                    System.GC.Collect();

                    //TODO:发送请求异常处理的邮件通知
                    FixException(StaticConfig.SpConfig.ActionEmail,
                                 StaticConfig.Spiderinfo.TaskName, StaticConfig.CountConfig.ErrorCodeStr);
                    Console.WriteLine("***********************************************************");
                }
                //程序界面头部显示的信息,方便查看数据情况
                StaticConfig.CountConfig.EMessage = string.Format("{0}[{1:MMddHHmm}]【{2}/{3}>{4:0.000}-{5}】[{6}/{7}]",
                                                                  StaticConfig.Spiderinfo.TaskName, StaticConfig.SpConfig.DateKs, StaticConfig.CountConfig.Itrue,
                                                                  StaticConfig.CountConfig.Itotal,
                                                                  ((double)StaticConfig.CountConfig.Itrue / StaticConfig.CountConfig.Itotal),
                                                                  StaticConfig.CountConfig.Ifasle, i, allInfoUrls.Count);
                Console.Title = StaticConfig.CountConfig.EMessage;
                #region 通知邮件(每8小时统计一次)
                //可以根据实际情况选择性给自己发邮件的,不强制
                if (DateTime.Now.Hour % 8 == 0 && DateTime.Now.Minute == 0)
                {
                    MessageEail("每8个小时反馈");
                }
                #endregion
            }
        }
Пример #3
0
        /// <summary>
        /// 半小时自动统计
        /// </summary>
        public void AutomaticCount()
        {
            int isign = 1;

            #region 自动统计爬虫信息
            Thread rdsTh = new Thread(() =>
            {
                while (true)
                {
                    if (Flg)
                    {
                        isign = 0;
                    }
                    if (DateTime.Now.Minute % 30 == 0 || (!Flg && isign == 0))
                    {
                        //当Flg第一次为false时进行写入
                        if (Flg || (!Flg && isign == 0))
                        {
                            long totalMount = TotalMount;
                            try
                            {
                                string sql = string.Format(
                                    "select TotalMount from spiderinfo where SpiderProgramName='{0}' and TaskName='{1}' and SiteName='{2}' and TemplateName='{3}' order by id DESC limit 0,1",
                                    SpiderProgramName, TaskName, SiteName, TemplateName);
                                totalMount =
                                    long.Parse(
                                        SpiderHelp.SaveModule.MySqlHelp.Select(AliyunConn, sql).Rows[0][0].ToString());
                            }
                            catch
                            {
                            }
                            TotalMount        = totalMount + CatchMount;
                            EndDT             = DateTime.Now;
                            ProxyRequestMount = RequestMount;
                            if (CatchMount > 0 && CatchMount <= RequestMount)
                            {
                                ProxyPercent = Convert.ToDecimal(((double)CatchMount / RequestMount).ToString("0.000"));
                            }
                            if (VertifyTrueMount > 0 && VertifyTrueMount <= VertifyMount)
                            {
                                VertifyPercent = Convert.ToDecimal(((double)VertifyTrueMount / VertifyMount).ToString("0.000"));
                            }
                            string rdsSql =
                                string.Format(
                                    "insert ignore into spiderinfo(SpiderProgramName,TaskName,SiteName,TemplateName,StartDT,EndDT,RequestMount,CatchMount,StoreMount,ProxyRequestMount,ProxyPercent,VertifyMount,VertifyPercent,TotalMount,TotalLength,IPAddress,Ext) values ('{0}','{1}','{2}','{3}','{4}','{5}',{6},{7},{8},{9},{10},{11},{12},{13},{14},'{15}','{16}');",
                                    SpiderProgramName, TaskName, SiteName, TemplateName,
                                    StartDT, EndDT, RequestMount, CatchMount,
                                    StoreMount, ProxyRequestMount, ProxyPercent,
                                    VertifyMount, VertifyPercent, TotalMount, TotalLength,
                                    IPAddress, Ext);
                            int itryMax = 3;
                            while (itryMax > 0)
                            {
                                int iflg = SpiderHelp.SaveModule.MySqlHelp.Insert(AliyunConn, rdsSql,
                                                                                  TaskName + "_数据监控失败记录");
                                isign = Flg ? 0 : 1;
                                if (iflg >= 0)
                                {
                                    itryMax = 0;
                                }
                                else
                                {
                                    itryMax--;
                                }
                                string checkInfo =
                                    string.Format(
                                        "爬虫:{0} 请求量:{1} 抓取量:{2} 入库量:{3} 代理请求量:{4} 代理成功率:{5} 打码次数:{6} 成功打码:{7} 打码成功率:{8} 总抓取量:{9} 时间:{10}",
                                        TaskName,
                                        RequestMount, CatchMount, StoreMount,
                                        ProxyRequestMount,
                                        ProxyPercent, VertifyMount, VertifyTrueMount,
                                        VertifyPercent,
                                        TotalMount, DateTime.Now);
                                CLog.DiaryLog(checkInfo,
                                              TaskName + "(每半小时统计一次)_" + DateTime.Now.ToString("yyyyMMdd") + ".txt");
                            }
                            StartDT           = DateTime.Now;
                            VertifyPercent    = 0;
                            VertifyMount      = 0;
                            VertifyTrueMount  = 0;
                            StoreMount        = 0;
                            ProxyRequestMount = 0;
                            CatchMount        = 0;
                            RequestMount      = 0;
                            ProxyPercent      = 0;
                            TotalLength       = 0;

                            Thread.Sleep(new TimeSpan(0, 1, 1));
                        }
                        else
                        {
                            Thread.Sleep(new TimeSpan(0, 0, 59));
                        }
                    }
                    else
                    {
                        Thread.Sleep(new TimeSpan(0, 0, 59));
                    }
                }
            });
            rdsTh.Start();
            #endregion
        }
Пример #4
0
 /// <summary>
 /// 初始任务源入库
 /// </summary>
 private void TaskAdd()
 {
     try
     {
         List <TaskUrlConfig> allInfoUrls = new List <TaskUrlConfig>();
         int maxPage = 214;
         for (int i = 1; i < maxPage + 1; i++)
         {
             string        urlInfo    = $"XXX";
             TaskUrlConfig allInfoUrl = new TaskUrlConfig
             {
                 CompanyName = "无",
                 Uid         = "无",
                 Tab         = "list",
                 Url         = urlInfo,
                 Md5         = MyConvert.ToUserMd5(urlInfo),
                 Method      = "get",
                 ICount      = 0,
                 IState      = 0,
                 Queue_time  = DateTime.Now,
                 Done_time   = DateTime.Now
             };
             allInfoUrls.Add(allInfoUrl);
         }
         Console.WriteLine($@"共计任务:【{allInfoUrls.Count}】>>>{DateTime.Now}");
         int      lssNum = 100;
         DateTime date   = DateTime.Now;
         string   sqll   = $"INSERT IGNORE INTO {actionTable}(CompanyName,Uid,Tab,Url,Md5,Method,ICount,IState,Queue_time,Done_time) VALUES";
         string   sqlStr = string.Empty;
         for (int i = 0; i < allInfoUrls.Count; i++)
         {
             sqlStr +=
                 $"('{allInfoUrls[i].CompanyName}','{allInfoUrls[i].Uid}','{allInfoUrls[i].Tab}','{allInfoUrls[i].Url}','{allInfoUrls[i].Md5}','{allInfoUrls[i].Method}',{allInfoUrls[i].ICount},{allInfoUrls[i].IState},now(),now()),";
             if (i % lssNum == lssNum - 1 || i == allInfoUrls.Count - 1)
             {
                 int itryMax = 3;
                 while (itryMax > 0)
                 {
                     int iflg = spideBll.Insert(sqll + sqlStr.TrimEnd(','), $"{taskName}任务入库异常");
                     if (iflg >= 0)
                     {
                         itryMax = 0;
                     }
                     else
                     {
                         itryMax--;
                     }
                     Console.WriteLine("入库【{0}】>>>{1}", iflg, DateTime.Now);
                 }
                 Console.WriteLine(@"**************************************************");
                 sqlStr = string.Empty;
             }
             Console.Title = $@"{taskName}任务入库[{date:MMddHHmm}]【{i}/{allInfoUrls.Count}】";
         }
     }
     catch (Exception ex)
     {
         Console.WriteLine($@"{ex.Message}>>>{DateTime.Now}");
         CLog.DiaryLog(ex.Message, $"\\{taskName}任务源入库异常\\{actionTable}任务源入库异常_{DateTime.Now:yyyyMMdd}.txt");
     }
 }