/// <summary> /// 使用分析器,执行对指定小说分类页面的小说信息的提取过程,并返回小说名称及其地址的字典集 /// </summary> /// <param name="typeUrl"></param> /// <param name="pageIndex">页码</param> public Dictionary <string, string> GetNovelUrls(string typeUrl, int pageIndex) { if (string.IsNullOrEmpty(typeUrl)) { throw new ArgumentNullException(nameof(typeUrl)); } if (string.IsNullOrEmpty(Analyzer.NovelNamePattern)) { throw new NullPatternException(); } typeUrl = HttpHelper.RelateToAbsolute(Analyzer.NovelSiteIndexUrl, typeUrl); var url = Analyzer.BuildNovelTypePageUrl(typeUrl, pageIndex); var pageHtml = HttpHelper.DownloadSource(url, Analyzer.Encode); return(Analyzer.GetNovelInfosDic(pageHtml)); }
public int GetTotalPageCount(string referenceUrl) { referenceUrl = HttpHelper.RelateToAbsolute(Analyzer.NovelSiteIndexUrl, referenceUrl); var html = HttpHelper.DownloadSource(referenceUrl, Analyzer.Encode); // 获取最大页码 var pageTotalCount = 1; if (!string.IsNullOrEmpty(Analyzer.TotalPagePattern)) { pageTotalCount = Analyzer.GetTotalPage(html); if (pageTotalCount <= 0) { pageTotalCount = 1; } } return(pageTotalCount); }
public Novel GetNovelInfo(string referenceUrl) { var html = HttpHelper.DownloadSource(referenceUrl, Analyzer.Encode); var author = Analyzer.GetAuthor(html); var desc = Analyzer.GetDescription(html); var cover = Analyzer.GetNovelCoverPath(html); var chaptersUrl = Analyzer.GetChaptersUrl(html); chaptersUrl = HttpHelper.RelateToAbsolute(referenceUrl, chaptersUrl); return(new Novel { Author = new Author { Name = author }, Description = desc, CoverUrl = cover, ChapterListUrl = chaptersUrl }); }
private void ProcessNovelUrls(Dictionary <string, string> typeDic) { typeDic.ForEach(couple => { Task.Factory.StartNew(() => { var typeName = couple.Key.RemoveSpace(); var typeId = TypeList.Find(type => type.TypeName.Equals(typeName)).Id; var typeUrl = HttpHelper.RelateToAbsolute(_novelSpider.Analyzer.NovelSiteIndexUrl, couple.Value); var totalPageCount = _novelSpider.GetTotalPageCount(typeUrl); for (var index = 0; index < totalPageCount; index++) { var novelUrlDic = _novelSpider.GetNovelUrls(typeUrl, index); novelUrlDic.ForEach(novelUrl => { try { var tempUrl = HttpHelper.RelateToAbsolute(typeUrl, novelUrl.Value); var novel = _novelSpider.GetNovelInfo(tempUrl); if (novel != null) { novel.Name = novelUrl.Key.RemoveSpace().RemoveHtmlTags(); novel.CoverUrl = HttpHelper.RelateToAbsolute(tempUrl, novel.CoverUrl); _novelDownloadQueue.Enqueue(new KeyValuePair <int, Novel>(typeId, novel)); StartDownloadNovel(); StartDownloadChapter(); } } catch (Exception) { // ignored } }); } }); }); }
private void ProcessChapters(Novel novel) { var chapterUrls = _novelSpider.GetChapterUrls(novel.ChapterListUrl); var sort = 0; foreach (var item in chapterUrls) { var chapterName = item.Key; var titleWithNoSpace = chapterName.RemoveSpace(); var chapterDownloadUrl = HttpHelper.RelateToAbsolute(novel.ChapterListUrl, item.Value); _chapterDownloadQueue.Enqueue(new Chapter { Title = chapterName, Url = chapterDownloadUrl, Sort = sort++, TitleWithNoSpace = titleWithNoSpace, NovelId = novel.Id }); } }