Пример #1
0
        protected override BookLink GetChapterLinks(HtmlDocument htmlDoc, string baseUrl)
        {
            var titleNode  = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='title']");
            var authorNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='info']");

            BookLink bookLink = new BookLink
            {
                IndexPage = baseUrl,
                Title     = Utilities.ToTraditional(titleNode?.InnerText),
                Author    = authorNode?.InnerText
            };

            baseUrl = Utilities.TrimEnd(baseUrl.ToLower(), "index.htm");

            List <HtmlNode> tableRows = new List <HtmlNode>();
            var             tables    = htmlDoc.DocumentNode.SelectNodes("//table[@class='css']");

            foreach (var tableNode in tables)
            {
                tableRows.AddRange(tableNode.Descendants("tr"));
            }

            List <IssueLink> issueLinks = new List <IssueLink>();

            foreach (var node in tableRows)
            {
                var titleRow = node.SelectSingleNode(".//td[@class='vcss']");
                if (titleRow != null)
                {
                    IssueLink b = new IssueLink
                    {
                        Title = Utilities.ToTraditional(titleRow.InnerText)
                    };
                    issueLinks.Add(b);
                    continue;
                }

                // 一個章節的 tr 裡面會有一個以上的 ccss 文章超連結
                var chapterRows = node.SelectNodes(".//td[@class='ccss']");
                foreach (var chapterRow in chapterRows)
                {
                    var link = chapterRow.Descendants("a").FirstOrDefault();
                    if (link != null)
                    {
                        ChapterLink chapterLink = new ChapterLink();
                        chapterLink.Title = Utilities.ToTraditional(link.InnerText);
                        chapterLink.Url   = baseUrl + link.Attributes["href"]?.Value;
                        issueLinks.Last()?.ChapterLinks.Add(chapterLink);
                    }
                }
            }

            bookLink.IssueLinks = issueLinks;
            return(bookLink);
        }
Пример #2
0
        /// <summary>
        /// 下載整套書
        /// </summary>
        public async Task <Book> GetBookAsync(BookLink bookLink, CodePage codePage)
        {
            if (bookLink == null || bookLink.IssueLinks.Count == 0)
            {
                return(null);
            }

            Book book = new Book();

            book.Title = bookLink.Title;
            foreach (var issue in bookLink.IssueLinks)
            {
                book.Issues.Add(await GetIssueAsync(issue, codePage));
                await Task.Delay(SLEEP_MS);
            }

            return(book);
        }
Пример #3
0
        /// <summary>
        /// 從一本書的主頁尋找目錄頁的超連結,並從目錄頁記下此書所有章節的連結
        /// </summary>
        protected override async Task <BookLink> FindBookIndexPageAsync(HtmlDocument bookDocument)
        {
            // get book status
            var    tdStatusNodes        = bookDocument.DocumentNode.SelectNodes("//td[@width='20%']");
            string lastUpdateTimeString = tdStatusNodes.FirstOrDefault(x => x.InnerText.StartsWith("最后更新:"))?.InnerText;

            lastUpdateTimeString = Utilities.TrimStart(lastUpdateTimeString, "最后更新:");
            // get index page link
            string indexHref = FindTargetHref(bookDocument, "小说目录");
            // get index page body
            string htmlString2 = await GetPageBodyAsync(indexHref, CodePage.Gb2312);

            HtmlDocument bookPageDocument = new HtmlDocument();

            bookPageDocument.LoadHtml(htmlString2);
            // parse the index page
            BookLink bookLink = GetChapterLinks(bookPageDocument, indexHref);

            if (DateTime.TryParse(lastUpdateTimeString, out DateTime lastTime))
            {
                bookLink.LastUpdateTime = lastTime;
            }
            return(bookLink);
        }