/// <summary> /// 实际执行操作 /// </summary> /// <param name="interval"></param> public void Execute(int interval) { try { //休息片刻 if (interval > 0) { int step = (int)Math.Round(interval * 0.3); int interval2 = new Random().Next(interval - step, interval + step); Thread.Sleep(interval2); } Log.Show(string.Format("<C{0}> {1} ", _CollectionModel.CollectionId, _CollectionModel.Name), ConsoleColor.DarkGreen); //如果存在章节列表地址并且当前章节列表数据为空的话,那么进行采集章节列表操作,说明上次采集到的数据已经被对比处理 while (!string.IsNullOrEmpty(this.currentUrl_ChapterList)) { //采集章节列表数据--要一次性将章节列表数据采集完存放到CollectionModel中 currenttask.Url = currentUrl_ChapterList; //设置采集的章节列表页url--初始url //开始执行采集操作 //currenttask.DoChapterList(_CollectionModel.IsUTF8); currenttask.Load(_CollectionModel.IsUTF8); //采集完成后的操作--数据填充到ConfigModel中去 buildModel(); } //章节列表数据采集完成---请求我们的图书数据,查看当前我们自己线上数据中图书章节状态信息 WxReceiveBookDto receiveResult = publishBiz.doGetBookInfo(this._CollectionModel.CollectionId); if (receiveResult.id == 0) //说明当前线上还没有此图书信息,我们要进行插入操作 { publishBiz.doSubmitBook(this._CollectionModel); } //再次获取线上图书状态数据--和当前采集到的最新章节列表数据进行比对 receiveResult = publishBiz.doGetBookInfo(this._CollectionModel.CollectionId); if (receiveResult.id == 0) { throw new Exception("线上图书信息不存在"); } wxchapter nextChapter = CollectionModel.GetNextChapter(this._CollectionModel.chapterList, receiveResult.maxchapterid); while (nextChapter != null) { try { int intervalX = new Random().Next(100, 500); //每次休息一定时间 //int intervalX = new Random().Next(20, 100); //每次休息一定时间 Thread.Sleep(intervalX); //开始采集本章节内容数据 this.currentUrl_ChapterDetail = nextChapter.chapterUrl; while (!string.IsNullOrEmpty(this.currentUrl_ChapterDetail)) { currenttask.Url = this.currentUrl_ChapterDetail; //currenttask.DoChapterDetail(_CollectionModel.IsUTF8); currenttask.Load(_CollectionModel.IsUTF8); //采集完成后的操作--数据填充到Model中去 buildModel(nextChapter); } if (string.IsNullOrEmpty(nextChapter.Id)) { throw new Exception("没有采集到章节id数据"); } publishBiz.doSubmitChapter(nextChapter); //提交完后错误计数重置 retryTimes = 0; Log.Show(string.Format("<C{0}> {1} 灌入章节数据: {2} ", _CollectionModel.CollectionId, _CollectionModel.Name, nextChapter.Title), ConsoleColor.DarkGreen); //再次获取线上图书状态数据 receiveResult = publishBiz.doGetBookInfo(this._CollectionModel.CollectionId); nextChapter = CollectionModel.GetNextChapter(this._CollectionModel.chapterList, receiveResult.maxchapterid); } catch (Exception exChapter) { int intervalW = new Random().Next(2 * 1000, 8 * 1000); //每次休息一定时间 Thread.Sleep(intervalW); Log.Show(string.Format("<C{0}> {1} error: {2}, chapter:{3} ", _CollectionModel.CollectionId, _CollectionModel.Name, exChapter.Message, this.currentUrl_ChapterDetail), ConsoleColor.DarkRed); Log.Show(string.Format("<C{0}> {1} retryTimes: {2}", _CollectionModel.CollectionId, _CollectionModel.Name, retryTimes)); retryTimes++; if (retryTimes >= 8) { throw exChapter; } } } //一次完整的采集操作完成,现在要清理数据间隔一个比较长的时间之后再次 Log.Show(string.Format("<C{0}> {1} 完成一次完整流程,暂时没有更新,休息24小时再次检查 ", _CollectionModel.CollectionId, _CollectionModel.Name), ConsoleColor.DarkGreen); //清理数据 this._CollectionModel.chapterList = new List <wxchapter>(); this.currentUrl_ChapterList = this._CollectionModel.Url; //再次启动检查操作 Thread.Sleep(24 * 60 * 60 * 1000); //直接休息24个小时 Execute(interval); } catch (Exception ex) { Log.Show(string.Format("<C{0}> {1} error: {2} ", _CollectionModel.CollectionId, _CollectionModel.Name, ex.Message), ConsoleColor.DarkRed); } }
/// <summary> /// 实际执行操作 /// </summary> /// <param name="interval"></param> public void Execute(int interval) { try { //休息片刻 if (interval > 0) { int step = (int)Math.Round(interval * 0.3); int interval2 = new Random().Next(interval - step, interval + step); Thread.Sleep(interval2); } Thread.Sleep(30 * 1000); //采集图书基本信息数据/ id,标题,简介 currenttask.Url = BaseUrl; currenttask.PageType = PageTypeEnum.StartupPage; currenttask.Load(_CollectionModel.IsUTF8); //采集完成后的操作--数据填充到ConfigModel中去 buildModel(); //写文件name和intro LocalFileIO.writeName(this._CollectionModel.Name, this.sortName); LocalFileIO.writeIntro(this._CollectionModel.Name, this._CollectionModel.Intr, this.sortName); //如果存在章节列表地址并且当前章节列表数据为空的话,那么进行采集章节列表操作,说明上次采集到的数据已经被对比处理 while (!string.IsNullOrEmpty(this.CurrentUrl_ChapterList)) { //采集章节列表数据--要一次性将章节列表数据采集完存放到CollectionModel中 currenttask.Url = CurrentUrl_ChapterList; //设置采集的章节列表页url--初始url //开始执行采集操作 //currenttask.DoChapterList(_CollectionModel.IsUTF8); currenttask.PageType = PageTypeEnum.ListPage1; currenttask.Load(_CollectionModel.IsUTF8); //采集完成后的操作--数据填充到ConfigModel中去 buildModel(); } //章节列表数据采集完成---请求我们的图书数据,查看当前我们自己线上数据中图书章节状态信息 foreach (var item in this._CollectionModel.chapterList) { int intervalX = new Random().Next(10 * 1000, 60 * 1000); //每次休息一定时间 Thread.Sleep(intervalX); //开始采集本章节内容数据 this.currentUrl_ChapterDetail = item.chapterUrl; while (!string.IsNullOrEmpty(this.currentUrl_ChapterDetail)) { currenttask.Url = this.currentUrl_ChapterDetail; //currenttask.DoChapterDetail(_CollectionModel.IsUTF8); currenttask.Load(_CollectionModel.IsUTF8); //采集完成后的操作--数据填充到Model中去 buildModel(item); } //publishBiz.doSubmitChapter(nextChapter); //保存本章数据 string realcontent = item.Content.Replace("<br>", System.Environment.NewLine) .Replace("<p>", System.Environment.NewLine) .Replace("</p>", System.Environment.NewLine) .Replace(" ", " ") ; LocalFileIO.writeChpater(this._CollectionModel.Name, item.Title, realcontent, this.sortName); Log.Show(string.Format("<C> {0} 写入章节数据: {1} ", _CollectionModel.Name, item.Title), ConsoleColor.DarkGreen); } //一次完整的采集操作完成,现在要清理数据间隔一个比较长的时间之后再次 Log.Show(string.Format("<C{0}> {1} 完成一次完整流程,暂时没有更新,休息24小时再次检查 ", _CollectionModel.CollectionId, _CollectionModel.Name), ConsoleColor.DarkGreen); //清理数据 this._CollectionModel.chapterList = new List <wxchapter>(); this.CurrentUrl_ChapterList = this._CollectionModel.Url; } catch (Exception ex) { Log.Show(string.Format("<C{0}> {1} error: {2} ", _CollectionModel.CollectionId, _CollectionModel.Name, ex.Message), ConsoleColor.DarkRed); } }