Exemplo n.º 1
0
        private NovelDetails GetDetails(NovelList Info)
        {
            NovelDetails novelDetails = new NovelDetails();

            novelDetails.Name    = Info.Name;
            novelDetails.NovelId = Info.NovelId;
            novelDetails.Url     = Info.Url;
            novelDetails.Cover   = Info.Cover;
            novelDetails.Author  = Info.Author;
            var    _introduction = biqudu.GetIntroduction(Info.Url);
            string Content       = _introduction.Trim();

            Content = Content.Replace(" ", "");
            novelDetails.Introduction = Content;
            return(novelDetails);
        }
Exemplo n.º 2
0
        /// <summary>
        /// 获取小说详情
        /// </summary>
        /// <param name="novelKey"></param>
        /// <returns></returns>
        public async Task <NovelDetails> GetNovelInfo(string novelKey)
        {
            //小说信息页url处理
            var novelUrl = _rule.NovelUrl.Replace("{NovelKey}", novelKey);

            if (!novelUrl.Contains(_rule.SiteUrl))
            {
                novelUrl = UtilityHelper.Combine(_rule.SiteUrl, novelUrl);
            }
            Logger.ColorConsole2("抓取小说详情:" + novelUrl);
            var novelInfoHtml = await HtmlHelper.Get(novelUrl);

            if (string.IsNullOrWhiteSpace(novelInfoHtml))
            {
                throw new SpiderException("小说详情页无法访问");
            }
            //匹配正则
            if (Regex.IsMatch(novelInfoHtml, _rule.NovelErr.Pattern))
            {
                throw new SpiderException("匹配到小说页面错误标识,失败");
            }
            var info = new NovelDetails();

            info.Name         = RegexMatch(_rule.NovelName, novelInfoHtml);
            info.ImageUrl     = RegexMatch(_rule.NovelImage, novelInfoHtml);
            info.Sort         = RegexMatch(_rule.NovelClassify, novelInfoHtml);
            info.Author       = RegexMatch(_rule.NovelAuthor, novelInfoHtml);
            info.State        = RegexIsMatch(_rule.NovelState, novelInfoHtml) ? 1 : 0;
            info.Des          = RegexMatch(_rule.NovelDes, novelInfoHtml);
            info.ChapterIndex = RegexMatch(_rule.ChapterIndex, novelInfoHtml); //章节目录

            if (!Utils.ObjectIsNotNull(info, "Des", "ChapterIndex"))
            {
                throw new SpiderException("获取小说详情失败,有匹配不到的值");
            }
            return(info);
        }
Exemplo n.º 3
0
 public TextDialog(NovelDetails _Novel) : this()
 {
     Novel            = _Novel;
     this.DataContext = Novel;
 }
Exemplo n.º 4
0
        private async Task ProcessUpdate(Spider spider, string novelKey, NovelDetails info, NovelInfo model)
        {
            var chapterIndex = info.ChapterIndex;
            var novelInfo    = _novelInfoRepository.FindOrDefault(x => x.Name == info.Name && x.Author == info.Author);

            if (novelInfo == null)
            {
                return;
            }
            //对比章节,判断是否需要新增
            var oldIndexes     = _novelIndexRepository.FindOrDefault(x => x.Id == novelInfo.IndexId); //老索引
            var oldChapterList = oldIndexes?.Indexex.Select(x => x.ChapterName).ToList();             //老的章节列表
            var chapterList    = await spider.GetNovelChapterList(novelKey, chapterIndex);            //抓取最新章节

            var newChapterList = chapterList.Select(x => x.Key).ToList();                             //新的章节列表
            int updateIndex    = 0;

            if (ChapterListNeedUpdate(oldChapterList, newChapterList, out updateIndex))
            {
                var indexes = new List <Index>();//更新的列表
                //更新章节
                for (int i = updateIndex; i < chapterList.Count; i++)
                {
                    if (!_isWorking)
                    {
                        break;
                    }

                    var chapter = chapterList[i];
                    try
                    {
                        var content = await spider.GetContent(novelKey, chapterIndex, chapter.Value);

                        var chapterId     = ObjectId.NextId();
                        var chapterEntity = new NovelChapter()
                        {
                            Id          = chapterId,
                            NovelId     = novelInfo.Id,
                            ChapterName = chapter.Key,
                            UpdateTime  = DateTime.Now,
                            WordCount   = Utils.GetWordCount(content),
                            Content     = content
                        };
                        _novelChapterRepository.Insert(novelInfo.Id, chapterEntity);
                        indexes.Add(new Index()
                        {
                            ChapterId = chapterId, ChapterName = chapter.Key
                        });                                                                           //索引目录
                        Thread.Sleep(500);
                    }
                    catch (SpiderException ex)
                    {
                        Logger.Error("{0}-{1} 小说章节抓取失败:{2}", chapter.Key, chapter.Value, ex.Message);

                        if (_options.SpiderOptions.错误章节处理 == 错误章节处理.停止本书_继续采集下一本)
                        {
                            Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.停止本书_继续采集下一本", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red);
                            break;
                        }
                        else if (_options.SpiderOptions.错误章节处理 == 错误章节处理.入库章节名_继续采集下一章)
                        {
                            Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.入库章节名_继续采集下一章", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red);
                            var chapterId     = ObjectId.NextId();
                            var chapterEntity = new NovelChapter()
                            {
                                Id          = chapterId,
                                NovelId     = novelInfo.Id,
                                ChapterName = chapter.Key,
                                UpdateTime  = DateTime.Now,
                                WordCount   = 0,
                                Content     = ""
                            };
                            _novelChapterRepository.Insert(novelInfo.Id, chapterEntity);
                            indexes.Add(new Index()
                            {
                                ChapterId = chapterId, ChapterName = chapter.Key
                            });
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Fatal(ex, "ProcessEngine.ProcessUpdate");
                        break;
                    }
                }
                //更新索引目录
                oldIndexes.Indexex.AddRange(indexes);
                _novelIndexRepository.Update(x => x.Id == oldIndexes.Id, oldIndexes);
                //更新小说详情
                novelInfo.State           = info.State;
                novelInfo.UpdateTime      = DateTime.Now;
                novelInfo.LatestChapter   = oldIndexes.Indexex.LastOrDefault()?.ChapterName;
                novelInfo.LatestChapterId = oldIndexes.Indexex.LastOrDefault()?.ChapterId;

                if (_options.SpiderOptions.自动更新分类)
                {
                    novelInfo.Sort = spider.MatchSort(info.Sort);
                }
                if (_options.SpiderOptions.自动更新封面)
                {
                    novelInfo.Cover = spider.DownLoadImageToBase64(info.ImageUrl);
                }
                if (_options.SpiderOptions.自动更新简介)
                {
                    novelInfo.Des = info.Des;
                }

                _novelInfoRepository.Update(x => x.Id == novelInfo.Id, novelInfo);
            }
        }
Exemplo n.º 5
0
        private async Task ProcessAdd(Spider spider, string novelKey, NovelDetails info)
        {
            var chapterIndex = info.ChapterIndex;
            //小说id
            var novelId = ObjectId.NextId();
            //目录索引id
            var novelIndexId = ObjectId.NextId();
            //小说封面
            var novelCover = spider.DownLoadImageToBase64(info.ImageUrl);

            /*
             * 1 >>> 获取章节列表
             */
            var chapterList = await spider.GetNovelChapterList(novelKey, chapterIndex);

            var indexes = new List <Index>();

            //抓取章节  单个抓取 需要延迟 不然容易被封
            for (int i = 0; i < chapterList.Count; i++)
            {
                if (!_isWorking)
                {
                    break;
                }

                var chapter = chapterList[i];
                try
                {
                    var content = await spider.GetContent(novelKey, chapterIndex, chapter.Value);

                    var chapterId     = ObjectId.NextId();
                    var chapterEntity = new NovelChapter()
                    {
                        Id          = chapterId,
                        NovelId     = novelId,
                        ChapterName = chapter.Key,
                        UpdateTime  = DateTime.Now,
                        WordCount   = Utils.GetWordCount(content),
                        Content     = content
                    };
                    _novelChapterRepository.Insert(novelId, chapterEntity);
                    indexes.Add(new Index()
                    {
                        ChapterId = chapterId, ChapterName = chapter.Key
                    });                                                                           //索引目录
                    Thread.Sleep(500);
                }
                catch (SpiderException ex)
                {
                    Logger.Error("{0}-{1} 小说章节抓取失败:{2}", chapter.Key, chapter.Value, ex.Message);

                    if (_options.SpiderOptions.错误章节处理 == 错误章节处理.停止本书_继续采集下一本)
                    {
                        Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.停止本书_继续采集下一本", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red);
                        break;
                    }
                    else if (_options.SpiderOptions.错误章节处理 == 错误章节处理.入库章节名_继续采集下一章)
                    {
                        Logger.ColorConsole2(string.Format("{0}-{1} 错误章节处理.入库章节名_继续采集下一章", chapter.Key, chapter.Value, ex.Message), ConsoleColor.Red);
                        var chapterId     = ObjectId.NextId();
                        var chapterEntity = new NovelChapter()
                        {
                            Id          = chapterId,
                            NovelId     = novelId,
                            ChapterName = chapter.Key,
                            UpdateTime  = DateTime.Now,
                            WordCount   = 0,
                            Content     = ""
                        };
                        _novelChapterRepository.Insert(novelId, chapterEntity);
                        indexes.Add(new Index()
                        {
                            ChapterId = chapterId, ChapterName = chapter.Key
                        });
                    }
                }
                catch (Exception ex)
                {
                    Logger.Fatal(ex, "ProcessEngine.ProcessAdd");
                    break;
                }
            }

            /*
             * 2 >>> 写入索引目录
             */
            var novelIndex = new NovelIndex()
            {
                Id         = novelIndexId,
                NovelId    = novelId,
                UpdateTime = DateTime.Now,
                Indexex    = indexes
            };

            _novelIndexRepository.Insert(novelIndex);

            /*
             * 3 >>> 写入小说详情
             */
            var novelInfo = new NovelInfo()
            {
                Id              = novelId,
                Name            = info.Name,
                Author          = info.Author,
                Sort            = spider.MatchSort(info.Sort),
                State           = info.State,
                Des             = info.Des,
                Cover           = novelCover,
                CreateTime      = DateTime.Now,
                UpdateTime      = DateTime.Now,
                LatestChapter   = indexes.LastOrDefault()?.ChapterName,
                LatestChapterId = indexes.LastOrDefault()?.ChapterId,
                IndexId         = novelIndexId
            };

            _novelInfoRepository.Insert(novelInfo);
        }