示例#1
0
        /// <summary>
        /// 异常捕获
        /// </summary>
        /// <param name="args"></param>
        private static void CrawlErrorEvent(CrawlErrorEventArgs args)
        {
            simpleCrawler.ErrorReceive(args);

            if (args.needChangeIp)//限制无法访问的IP
            {
                IPInvalidProcess(args.IpProx);
            }

            var nextDepth = args.Depth + Settings.Depth / 10;

            //超时考虑重新添加,防止无限循环
            if (args.needTryAgain && Settings.IgnoreFailUrl == false)
            {
                if (args.Depth <= Settings.Depth)
                {
                    UrlQueue.Instance.EnQueue(new UrlInfo(args.Url)
                    {
                        Depth = nextDepth
                    });
                }
                Console.WriteLine(string.Format("{0}重试深度{1}{2}", args.Exception.Message, nextDepth, args.IpProx != null? args.IpProx.IP:string.Empty));
            }


            Console.WriteLine(args.Exception.Message);
        }
 /// <summary>
 /// void错误处理
 /// </summary>
 /// <param name="args"></param>
 public void ErrorReceive(CrawlErrorEventArgs args)
 {
     try
     {
         if (args.Exception != null && (args.Exception.Message.Contains("超时") || args.Exception.Message.Contains("连接尝试失败")))
         {
             var guid = GetUrlParam(args.Url, "keyNo");//获取脉络图方式;
             if (string.IsNullOrEmpty(guid))
             {
                 guid = GetUrlParam(args.Url, "unique");
             }
             var curUpdateBson = new BsonDocument().Add("detailInfo", "2").Add("isTimeOut", "1");
             DBChangeQueue.Instance.EnQueue(new StorageData()
             {
                 Document = curUpdateBson, Name = DataTableName, Type = StorageType.Update, Query = Query.EQ("eGuid", guid)
             });
             Console.WriteLine(string.Format("发生超时操作:{0}{1}", args.Exception.Message, args.Url));
         }
     }
     catch (Exception ex)
     {
         Console.WriteLine(string.Format("进行错误处理时候发生了如下错误:{0}{1}", ex.Message));
     }
 }
        /// <summary>
        /// void错误处理
        /// </summary>
        /// <param name="args"></param>
        public void ErrorReceive(CrawlErrorEventArgs args)
        {


        }
 /// <summary>
 /// void错误处理
 /// </summary>
 /// <param name="args"></param>
 public void ErrorReceive(CrawlErrorEventArgs args)
 {
     Console.WriteLine("{0}出错", args.Url);
 }