private NovelDetails GetDetails(NovelList Info) { NovelDetails novelDetails = new NovelDetails(); novelDetails.Name = Info.Name; novelDetails.NovelId = Info.NovelId; novelDetails.Url = Info.Url; novelDetails.Cover = Info.Cover; novelDetails.Author = Info.Author; var _introduction = biqudu.GetIntroduction(Info.Url); string Content = _introduction.Trim(); Content = Content.Replace(" ", ""); novelDetails.Introduction = Content; return(novelDetails); }
/// <summary> /// 获取小说详情 /// </summary> /// <param name="novelKey"></param> /// <returns></returns> public async Task <NovelDetails> GetNovelInfo(string novelKey) { //小说信息页url处理 var novelUrl = _rule.NovelUrl.Replace("{NovelKey}", novelKey); if (!novelUrl.Contains(_rule.SiteUrl)) { novelUrl = UtilityHelper.Combine(_rule.SiteUrl, novelUrl); } Logger.ColorConsole2("抓取小说详情:" + novelUrl); var novelInfoHtml = await HtmlHelper.Get(novelUrl); if (string.IsNullOrWhiteSpace(novelInfoHtml)) { throw new SpiderException("小说详情页无法访问"); } //匹配正则 if (Regex.IsMatch(novelInfoHtml, _rule.NovelErr.Pattern)) { throw new SpiderException("匹配到小说页面错误标识,失败"); } var info = new NovelDetails(); info.Name = RegexMatch(_rule.NovelName, novelInfoHtml); info.ImageUrl = RegexMatch(_rule.NovelImage, novelInfoHtml); info.Sort = RegexMatch(_rule.NovelClassify, novelInfoHtml); info.Author = RegexMatch(_rule.NovelAuthor, novelInfoHtml); info.State = RegexIsMatch(_rule.NovelState, novelInfoHtml) ? 1 : 0; info.Des = RegexMatch(_rule.NovelDes, novelInfoHtml); info.ChapterIndex = RegexMatch(_rule.ChapterIndex, novelInfoHtml); //章节目录 if (!Utils.ObjectIsNotNull(info, "Des", "ChapterIndex")) { throw new SpiderException("获取小说详情失败,有匹配不到的值"); } return(info); }
public TextDialog(NovelDetails _Novel) : this() { Novel = _Novel; this.DataContext = Novel; }
private async Task ProcessUpdate(Spider spider, string novelKey, NovelDetails info, NovelInfo model) { var chapterIndex = info.ChapterIndex; var novelInfo = _novelInfoRepository.FindOrDefault(x => x.Name == info.Name && x.Author == info.Author); if (novelInfo == null) { return; } //对比章节,判断是否需要新增 var oldIndexes = _novelIndexRepository.FindOrDefault(x => x.Id == novelInfo.IndexId); //老索引 var oldChapterList = oldIndexes?.Indexex.Select(x => x.ChapterName).ToList(); //老的章节列表 var chapterList = await spider.GetNovelChapterList(novelKey, chapterIndex); //抓取最新章节 var newChapterList = chapterList.Select(x => x.Key).ToList(); //新的章节列表 int updateIndex = 0; if (ChapterListNeedUpdate(oldChapterList, newChapterList, out updateIndex)) { var indexes = new List <Index>();//更新的列表 //更新章节 for (int i = updateIndex; i < chapterList.Count; i++) { if (!_isWorking) { break; } var chapter = chapterList[i]; try { var content = await spider.GetContent(novelKey, chapterIndex, chapter.Value); var chapterId = ObjectId.NextId(); var chapterEntity = new NovelChapter() { Id = chapterId, NovelId = novelInfo.Id, ChapterName = chapter.Key, UpdateTime = DateTime.Now, WordCount = Utils.GetWordCount(content), Content = content }; _novelChapterRepository.Insert(novelInfo.Id, chapterEntity); indexes.Add(new Index() { ChapterId = chapterId, ChapterName = chapter.Key }); //索引目录 Thread.Sleep(500); } catch (SpiderException ex) { Logger.Error("{0}-{1} 小说章节抓取失败:{2}", chapter.Key, chapter.Value, ex.Message); if (_options.SpiderOptions.错误章节处理 == 错误章节处理.停止本书_继续采集下一本) { Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.停止本书_继续采集下一本", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red); break; } else if (_options.SpiderOptions.错误章节处理 == 错误章节处理.入库章节名_继续采集下一章) { Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.入库章节名_继续采集下一章", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red); var chapterId = ObjectId.NextId(); var chapterEntity = new NovelChapter() { Id = chapterId, NovelId = novelInfo.Id, ChapterName = chapter.Key, UpdateTime = DateTime.Now, WordCount = 0, Content = "" }; _novelChapterRepository.Insert(novelInfo.Id, chapterEntity); indexes.Add(new Index() { ChapterId = chapterId, ChapterName = chapter.Key }); } } catch (Exception ex) { Logger.Fatal(ex, "ProcessEngine.ProcessUpdate"); break; } } //更新索引目录 oldIndexes.Indexex.AddRange(indexes); _novelIndexRepository.Update(x => x.Id == oldIndexes.Id, oldIndexes); //更新小说详情 novelInfo.State = info.State; novelInfo.UpdateTime = DateTime.Now; novelInfo.LatestChapter = oldIndexes.Indexex.LastOrDefault()?.ChapterName; novelInfo.LatestChapterId = oldIndexes.Indexex.LastOrDefault()?.ChapterId; if (_options.SpiderOptions.自动更新分类) { novelInfo.Sort = spider.MatchSort(info.Sort); } if (_options.SpiderOptions.自动更新封面) { novelInfo.Cover = spider.DownLoadImageToBase64(info.ImageUrl); } if (_options.SpiderOptions.自动更新简介) { novelInfo.Des = info.Des; } _novelInfoRepository.Update(x => x.Id == novelInfo.Id, novelInfo); } }
private async Task ProcessAdd(Spider spider, string novelKey, NovelDetails info) { var chapterIndex = info.ChapterIndex; //小说id var novelId = ObjectId.NextId(); //目录索引id var novelIndexId = ObjectId.NextId(); //小说封面 var novelCover = spider.DownLoadImageToBase64(info.ImageUrl); /* * 1 >>> 获取章节列表 */ var chapterList = await spider.GetNovelChapterList(novelKey, chapterIndex); var indexes = new List <Index>(); //抓取章节 单个抓取 需要延迟 不然容易被封 for (int i = 0; i < chapterList.Count; i++) { if (!_isWorking) { break; } var chapter = chapterList[i]; try { var content = await spider.GetContent(novelKey, chapterIndex, chapter.Value); var chapterId = ObjectId.NextId(); var chapterEntity = new NovelChapter() { Id = chapterId, NovelId = novelId, ChapterName = chapter.Key, UpdateTime = DateTime.Now, WordCount = Utils.GetWordCount(content), Content = content }; _novelChapterRepository.Insert(novelId, chapterEntity); indexes.Add(new Index() { ChapterId = chapterId, ChapterName = chapter.Key }); //索引目录 Thread.Sleep(500); } catch (SpiderException ex) { Logger.Error("{0}-{1} 小说章节抓取失败:{2}", chapter.Key, chapter.Value, ex.Message); if (_options.SpiderOptions.错误章节处理 == 错误章节处理.停止本书_继续采集下一本) { Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.停止本书_继续采集下一本", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red); break; } else if (_options.SpiderOptions.错误章节处理 == 错误章节处理.入库章节名_继续采集下一章) { Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.入库章节名_继续采集下一章", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red); var chapterId = ObjectId.NextId(); var chapterEntity = new NovelChapter() { Id = chapterId, NovelId = novelId, ChapterName = chapter.Key, UpdateTime = DateTime.Now, WordCount = 0, Content = "" }; _novelChapterRepository.Insert(novelId, chapterEntity); indexes.Add(new Index() { ChapterId = chapterId, ChapterName = chapter.Key }); } } catch (Exception ex) { Logger.Fatal(ex, "ProcessEngine.ProcessAdd"); break; } } /* * 2 >>> 写入索引目录 */ var novelIndex = new NovelIndex() { Id = novelIndexId, NovelId = novelId, UpdateTime = DateTime.Now, Indexex = indexes }; _novelIndexRepository.Insert(novelIndex); /* * 3 >>> 写入小说详情 */ var novelInfo = new NovelInfo() { Id = novelId, Name = info.Name, Author = info.Author, Sort = spider.MatchSort(info.Sort), State = info.State, Des = info.Des, Cover = novelCover, CreateTime = DateTime.Now, UpdateTime = DateTime.Now, LatestChapter = indexes.LastOrDefault()?.ChapterName, LatestChapterId = indexes.LastOrDefault()?.ChapterId, IndexId = novelIndexId }; _novelInfoRepository.Insert(novelInfo); }