예제 #1
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            var paged = GetPagedChapterUrls(doc.DocumentElement);

            WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token);

            if (chapter == null)
            {
                return(null);
            }

            chapter.Url            = link.Url;
            chapter.NextChapterUrl = UrlHelper.ToAbsoluteUrl(link.Url, chapter.NextChapterUrl);

            foreach (var page in paged)
            {
                string pageContent = await GetWebPageAsync(page, token);

                IHtmlDocument pageDoc = await Parser.ParseAsync(pageContent, token);

                chapter.Content += ParseChapter(pageDoc.DocumentElement, token).Content;
            }

            return(chapter);
        }
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string pageContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(pageContent, token);

            IElement postBodyEl = (from e in doc.All
                                   where e.LocalName == "div"
                                   where e.HasAttribute("class")
                                   let classAttribute = e.GetAttribute("class")
                                   where classAttribute.Contains("post_body")
                                   select e).FirstOrDefault();

            if (postBodyEl == null)
                return null;

            RemoveNavigation(postBodyEl);
            RemoveDonation(postBodyEl);
            ExpandSpoilers(postBodyEl);
            RemoveEmptyTags(postBodyEl);

            var content = CleanupHTML(postBodyEl.InnerHtml);

            return new WebNovelChapter
            {
                Url = link.Url,
                Content = content
            };
        }
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string pageContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(pageContent, token);

            IElement postBodyEl = doc.QuerySelector(".chapter-content");

            if (postBodyEl == null)
            {
                return(null);
            }

            RemoveNavigation(postBodyEl);
            RemoveAdvertisements(postBodyEl);
            ExpandSpoilers(postBodyEl);
            RemoveFontStyle(postBodyEl);

            var content = new ContentCleanup(BaseUrl).Execute(doc, postBodyEl);

            return(new WebNovelChapter
            {
                Url = link.Url,
                Content = content
            });
        }
예제 #4
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string pageContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(pageContent, token);

            IElement postBodyEl = (from e in doc.All
                                   where e.LocalName == "div"
                                   where e.HasAttribute("class")
                                   let classAttribute = e.GetAttribute("class")
                                                        where classAttribute.Contains("post_body")
                                                        select e).FirstOrDefault();

            if (postBodyEl == null)
            {
                return(null);
            }

            RemoveNavigation(postBodyEl);
            RemoveDonation(postBodyEl);
            ExpandSpoilers(postBodyEl);
            RemoveEmptyTags(postBodyEl);

            var content = CleanupHTML(postBodyEl.InnerHtml);

            return(new WebNovelChapter
            {
                Url = link.Url,
                Content = content
            });
        }
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            return(new WebNovelChapter()
            {
                Content = new ContentCleanup(BaseUrl).Execute(doc, doc.QuerySelector("#storytext"))
            });
        }
예제 #6
0
        public override async Task<WebNovelChapter> GetChapterAsync(
            ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement titleElement = doc.DocumentElement.FirstWhereHasClass(TitleClasses);

            WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token);
            chapter.Url = link.Url;

            if (titleElement != null)
                chapter.ChapterName = titleElement.Text().Trim();

            return chapter;
        }
        public override async Task <WebNovelChapter> GetChapterAsync(
            ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token         = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement titleElement = doc.DocumentElement.FirstWhereHasClass(TitleClasses);

            WebNovelChapter chapter = ParseChapter(doc, link.Url, doc.DocumentElement, token);

            chapter.Url = link.Url;

            if (titleElement != null)
            {
                chapter.ChapterName = titleElement.Text().Trim();
            }

            return(chapter);
        }
예제 #8
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            IElement titleElement = doc.DocumentElement.FirstWhereHasClass(ChapterTitleClasses);
            IElement chapterElement = doc.DocumentElement.FirstWhereHasClass(ChapterClasses);

            var chContentElements = chapterElement.WhereHasClass(ChapterContentClasses, element => element.LocalName == "sentence");

            string contents = string.Join("<br/><br/>", chContentElements.Select(p => p.InnerHtml));
            string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href");

            return new WebNovelChapter
            {
                ChapterName = titleElement?.TextContent,
                Content = contents,
                NextChapterUrl = nextChapter
            };
        }
 public virtual Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken))
 {
     throw new NotImplementedException();
 }
예제 #10
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            IElement titleElement   = doc.DocumentElement.FirstWhereHasClass(ChapterTitleClasses);
            IElement chapterElement = doc.DocumentElement.FirstWhereHasClass(ChapterClasses);

            var chContentElements = chapterElement.WhereHasClass(ChapterContentClasses, element => element.LocalName == "sentence");

            string contents    = string.Join("<br/><br/>", chContentElements.Select(p => p.InnerHtml));
            string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href");

            return(new WebNovelChapter
            {
                ChapterName = titleElement?.TextContent,
                Content = contents,
                NextChapterUrl = nextChapter
            });
        }
예제 #11
0
 public virtual Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken))
 {
     throw new NotImplementedException();
 }
예제 #12
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            IElement titleElement   = doc.DocumentElement.QuerySelector(".chapter-title");
            IElement chapterElement = doc.DocumentElement.QuerySelector(".chapter-body");

            // Append paragraphs after each "sentence.translated" element.
            chapterElement
            .QuerySelectorAll("sentence.translated")
            .ToList()
            .ForEach((obj) => obj.AppendChild(doc.CreateElement("P")));
            var contentEl = doc.CreateElement("P");

            contentEl.InnerHtml = string.Join("", chapterElement
                                              .QuerySelectorAll("sentence.translated")
                                              .Select(x => x.InnerHtml));
            RemoveSpecialTags(doc, contentEl);

            string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href");

            return(new WebNovelChapter
            {
                ChapterName = titleElement?.GetInnerText(),
                Content = new ContentCleanup(BaseUrl).Execute(doc, contentEl),
                NextChapterUrl = nextChapter
            });
        }
예제 #13
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);
            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            var paged = GetPagedChapterUrls(doc.DocumentElement);

            WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token);

            if (chapter == null)
                return null;

            chapter.Url = link.Url;
            chapter.NextChapterUrl = UrlHelper.ToAbsoluteUrl(link.Url, chapter.NextChapterUrl);

            foreach (var page in paged)
            {
                string pageContent = await GetWebPageAsync(page, token);

                IHtmlDocument pageDoc = await Parser.ParseAsync(pageContent, token);

                chapter.Content += ParseChapter(pageDoc.DocumentElement, token).Content;
            }

            return chapter;
        }
예제 #14
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement contentElement = doc.GetElementById("mw-content-text");

            if (contentElement == null)
            {
                return(null);
            }

            doc.GetElementById("toc")?.Remove();

            RemoveTables(contentElement);

            foreach (IElement linkElement in contentElement.Descendents <IElement>().Where(p => p.LocalName == "a"))
            {
                if (!linkElement.HasAttribute("href"))
                {
                    continue;
                }

                string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href"));

                linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel));

                IElement imgElement = linkElement.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "img");

                if (imgElement != null)
                {
                    foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList())
                    {
                        imgElement.RemoveAttribute(attrib.Name);
                    }

                    string linkImgUrl     = linkElement.GetAttribute("href");
                    string imgPageContent = await GetWebPageAsync(linkImgUrl, token);

                    IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token);

                    IElement fullImageElement = (from e in imgDoc.Descendents <IElement>()
                                                 where e.LocalName == "div"
                                                 where e.HasAttribute("class")
                                                 let classAttribute = e.GetAttribute("class")
                                                                      where classAttribute == "fullMedia"
                                                                      let imgLink = e.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "a")
                                                                                    select imgLink).FirstOrDefault();

                    if (fullImageElement == null || !fullImageElement.HasAttribute("href"))
                    {
                        continue;
                    }

                    string imageLink = fullImageElement.GetAttribute("href");

                    imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink));
                }
            }

            return(new WebNovelChapter
            {
                Url = link.Url,
                Content = contentElement.InnerHtml
            });
        }
예제 #15
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);
            IElement contentElement = doc.GetElementById("mw-content-text");

            if (contentElement == null)
                return null;

            doc.GetElementById("toc")?.Remove();

            RemoveTables(contentElement);

            foreach (IElement linkElement in contentElement.Descendents<IElement>().Where(p => p.LocalName == "a"))
            {
                if (!linkElement.HasAttribute("href"))
                    continue;

                string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href"));

                linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel));

                IElement imgElement = linkElement.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "img");

                if (imgElement != null)
                {
                    foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList())
                        imgElement.RemoveAttribute(attrib.Name);

                    string linkImgUrl = linkElement.GetAttribute("href");
                    string imgPageContent = await GetWebPageAsync(linkImgUrl, token);

                    IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token);

                    IElement fullImageElement = (from e in imgDoc.Descendents<IElement>()
                                                 where e.LocalName == "div"
                                                 where e.HasAttribute("class")
                                                 let classAttribute = e.GetAttribute("class")
                                                 where classAttribute == "fullMedia"
                                                 let imgLink = e.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "a")
                                                 select imgLink).FirstOrDefault();

                    if (fullImageElement == null || !fullImageElement.HasAttribute("href"))
                        continue;

                    string imageLink = fullImageElement.GetAttribute("href");

                    imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink));
                }
            }

            return new WebNovelChapter
            {
                Url = link.Url,
                Content = contentElement.InnerHtml
            };
        }