protected override BookLink GetChapterLinks(HtmlDocument htmlDoc, string baseUrl) { var titleNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='title']"); var authorNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='info']"); BookLink bookLink = new BookLink { IndexPage = baseUrl, Title = Utilities.ToTraditional(titleNode?.InnerText), Author = authorNode?.InnerText }; baseUrl = Utilities.TrimEnd(baseUrl.ToLower(), "index.htm"); List <HtmlNode> tableRows = new List <HtmlNode>(); var tables = htmlDoc.DocumentNode.SelectNodes("//table[@class='css']"); foreach (var tableNode in tables) { tableRows.AddRange(tableNode.Descendants("tr")); } List <IssueLink> issueLinks = new List <IssueLink>(); foreach (var node in tableRows) { var titleRow = node.SelectSingleNode(".//td[@class='vcss']"); if (titleRow != null) { IssueLink b = new IssueLink { Title = Utilities.ToTraditional(titleRow.InnerText) }; issueLinks.Add(b); continue; } // 一個章節的 tr 裡面會有一個以上的 ccss 文章超連結 var chapterRows = node.SelectNodes(".//td[@class='ccss']"); foreach (var chapterRow in chapterRows) { var link = chapterRow.Descendants("a").FirstOrDefault(); if (link != null) { ChapterLink chapterLink = new ChapterLink(); chapterLink.Title = Utilities.ToTraditional(link.InnerText); chapterLink.Url = baseUrl + link.Attributes["href"]?.Value; issueLinks.Last()?.ChapterLinks.Add(chapterLink); } } } bookLink.IssueLinks = issueLinks; return(bookLink); }
/// <summary> /// 下載整套書 /// </summary> public async Task <Book> GetBookAsync(BookLink bookLink, CodePage codePage) { if (bookLink == null || bookLink.IssueLinks.Count == 0) { return(null); } Book book = new Book(); book.Title = bookLink.Title; foreach (var issue in bookLink.IssueLinks) { book.Issues.Add(await GetIssueAsync(issue, codePage)); await Task.Delay(SLEEP_MS); } return(book); }
/// <summary> /// 從一本書的主頁尋找目錄頁的超連結,並從目錄頁記下此書所有章節的連結 /// </summary> protected override async Task <BookLink> FindBookIndexPageAsync(HtmlDocument bookDocument) { // get book status var tdStatusNodes = bookDocument.DocumentNode.SelectNodes("//td[@width='20%']"); string lastUpdateTimeString = tdStatusNodes.FirstOrDefault(x => x.InnerText.StartsWith("最后更新:"))?.InnerText; lastUpdateTimeString = Utilities.TrimStart(lastUpdateTimeString, "最后更新:"); // get index page link string indexHref = FindTargetHref(bookDocument, "小说目录"); // get index page body string htmlString2 = await GetPageBodyAsync(indexHref, CodePage.Gb2312); HtmlDocument bookPageDocument = new HtmlDocument(); bookPageDocument.LoadHtml(htmlString2); // parse the index page BookLink bookLink = GetChapterLinks(bookPageDocument, indexHref); if (DateTime.TryParse(lastUpdateTimeString, out DateTime lastTime)) { bookLink.LastUpdateTime = lastTime; } return(bookLink); }