/// <summary> /// 异常捕获 /// </summary> /// <param name="args"></param> private static void CrawlErrorEvent(CrawlErrorEventArgs args) { simpleCrawler.ErrorReceive(args); if (args.needChangeIp)//限制无法访问的IP { IPInvalidProcess(args.IpProx); } var nextDepth = args.Depth + Settings.Depth / 10; //超时考虑重新添加,防止无限循环 if (args.needTryAgain && Settings.IgnoreFailUrl == false) { if (args.Depth <= Settings.Depth) { UrlQueue.Instance.EnQueue(new UrlInfo(args.Url) { Depth = nextDepth }); } Console.WriteLine(string.Format("{0}重试深度{1}{2}", args.Exception.Message, nextDepth, args.IpProx != null? args.IpProx.IP:string.Empty)); } Console.WriteLine(args.Exception.Message); }
/// <summary> /// void错误处理 /// </summary> /// <param name="args"></param> public void ErrorReceive(CrawlErrorEventArgs args) { try { if (args.Exception != null && (args.Exception.Message.Contains("超时") || args.Exception.Message.Contains("连接尝试失败"))) { var guid = GetUrlParam(args.Url, "keyNo");//获取脉络图方式; if (string.IsNullOrEmpty(guid)) { guid = GetUrlParam(args.Url, "unique"); } var curUpdateBson = new BsonDocument().Add("detailInfo", "2").Add("isTimeOut", "1"); DBChangeQueue.Instance.EnQueue(new StorageData() { Document = curUpdateBson, Name = DataTableName, Type = StorageType.Update, Query = Query.EQ("eGuid", guid) }); Console.WriteLine(string.Format("发生超时操作:{0}{1}", args.Exception.Message, args.Url)); } } catch (Exception ex) { Console.WriteLine(string.Format("进行错误处理时候发生了如下错误:{0}{1}", ex.Message)); } }
/// <summary> /// void错误处理 /// </summary> /// <param name="args"></param> public void ErrorReceive(CrawlErrorEventArgs args) { }
/// <summary> /// void错误处理 /// </summary> /// <param name="args"></param> public void ErrorReceive(CrawlErrorEventArgs args) { Console.WriteLine("{0}出错", args.Url); }