Ejemplo n.º 1
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string pageContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(pageContent, token);

            IElement postBodyEl = (from e in doc.All
                                   where e.LocalName == "div"
                                   where e.HasAttribute("class")
                                   let classAttribute = e.GetAttribute("class")
                                   where classAttribute.Contains("post_body")
                                   select e).FirstOrDefault();

            if (postBodyEl == null)
                return null;

            RemoveNavigation(postBodyEl);
            RemoveDonation(postBodyEl);
            ExpandSpoilers(postBodyEl);
            RemoveEmptyTags(postBodyEl);

            var content = CleanupHTML(postBodyEl.InnerHtml);

            return new WebNovelChapter
            {
                Url = link.Url,
                Content = content
            };
        }
Ejemplo n.º 2
0
        protected virtual IEnumerable<ChapterLink> CollectChapterLinks(string baseUrl, IEnumerable<IElement> linkElements,
            Func<IElement, bool> linkFilter = null)
        {
            if (linkFilter != null)
                linkElements = linkElements.Where(linkFilter);

            linkElements = linkElements.Where(p => p.LocalName == "a");

            foreach (IElement e in linkElements)
            {
                if (string.IsNullOrWhiteSpace(e.TextContent) || !e.HasAttribute("href"))
                    continue;

                string url = UrlHelper.ToAbsoluteUrl(baseUrl, e.GetAttribute("href"));

                if (string.IsNullOrEmpty(url))
                    continue;

                ChapterLink link = new ChapterLink
                {
                    Name = WebUtility.HtmlDecode(e.TextContent),
                    Url = url
                };

                yield return link;
            }
        }
Ejemplo n.º 3
0
        public override async Task<WebNovelChapter> GetChapterAsync(
            ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement titleElement = doc.DocumentElement.FirstWhereHasClass(TitleClasses);

            WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token);
            chapter.Url = link.Url;

            if (titleElement != null)
                chapter.ChapterName = titleElement.Text().Trim();

            return chapter;
        }
Ejemplo n.º 4
0
        protected override IEnumerable<ChapterLink> CollectChapterLinks(string baseUrl, IEnumerable<IElement> linkElements, Func<IElement, bool> linkFilter = null)
        {
            foreach (IElement chapterElement in linkElements)
            {
                IElement linkElement = chapterElement.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "a");

                if (linkElement == null || !linkElement.HasAttribute("title") || !linkElement.HasAttribute("href"))
                    continue;

                string title = linkElement.GetAttribute("title");

                ChapterLink link = new ChapterLink
                {
                    Name = title,
                    Url = linkElement.GetAttribute("href"),
                    Unknown = false
                };

                yield return link;
            }
        }
Ejemplo n.º 5
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            IElement titleElement = doc.DocumentElement.FirstWhereHasClass(ChapterTitleClasses);
            IElement chapterElement = doc.DocumentElement.FirstWhereHasClass(ChapterClasses);

            var chContentElements = chapterElement.WhereHasClass(ChapterContentClasses, element => element.LocalName == "sentence");

            string contents = string.Join("<br/><br/>", chContentElements.Select(p => p.InnerHtml));
            string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href");

            return new WebNovelChapter
            {
                ChapterName = titleElement?.TextContent,
                Content = contents,
                NextChapterUrl = nextChapter
            };
        }
Ejemplo n.º 6
0
        protected override IEnumerable<ChapterLink> CollectChapterLinks(string baseUrl, IEnumerable<IElement> linkElements, Func<IElement, bool> linkFilter = null)
        {
            foreach (IElement possibleChapter in linkElements)
            {
                if (!possibleChapter.HasAttribute("href"))
                    continue;

                string chTitle = WebUtility.HtmlDecode(possibleChapter.TextContent);
                string chLink = possibleChapter.GetAttribute("href");
                chLink = UrlHelper.ToAbsoluteUrl(BaseUrl, chLink);

                ChapterLink link = new ChapterLink
                {
                    Name = chTitle,
                    Url = chLink,
                    Unknown = true
                };

                if (PossibleChapterNameParts.Any(p => chTitle.IndexOf(p, StringComparison.CurrentCultureIgnoreCase) >= 0))
                    link.Unknown = false;

                yield return link;
            }
        }
Ejemplo n.º 7
0
 public virtual Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken))
 {
     throw new NotImplementedException();
 }
Ejemplo n.º 8
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);
            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            var paged = GetPagedChapterUrls(doc.DocumentElement);

            WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token);

            if (chapter == null)
                return null;

            chapter.Url = link.Url;
            chapter.NextChapterUrl = UrlHelper.ToAbsoluteUrl(link.Url, chapter.NextChapterUrl);

            foreach (var page in paged)
            {
                string pageContent = await GetWebPageAsync(page, token);

                IHtmlDocument pageDoc = await Parser.ParseAsync(pageContent, token);

                chapter.Content += ParseChapter(pageDoc.DocumentElement, token).Content;
            }

            return chapter;
        }
Ejemplo n.º 9
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);
            IElement contentElement = doc.GetElementById("mw-content-text");

            if (contentElement == null)
                return null;

            doc.GetElementById("toc")?.Remove();

            RemoveTables(contentElement);

            foreach (IElement linkElement in contentElement.Descendents<IElement>().Where(p => p.LocalName == "a"))
            {
                if (!linkElement.HasAttribute("href"))
                    continue;

                string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href"));

                linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel));

                IElement imgElement = linkElement.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "img");

                if (imgElement != null)
                {
                    foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList())
                        imgElement.RemoveAttribute(attrib.Name);

                    string linkImgUrl = linkElement.GetAttribute("href");
                    string imgPageContent = await GetWebPageAsync(linkImgUrl, token);

                    IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token);

                    IElement fullImageElement = (from e in imgDoc.Descendents<IElement>()
                                                 where e.LocalName == "div"
                                                 where e.HasAttribute("class")
                                                 let classAttribute = e.GetAttribute("class")
                                                 where classAttribute == "fullMedia"
                                                 let imgLink = e.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "a")
                                                 select imgLink).FirstOrDefault();

                    if (fullImageElement == null || !fullImageElement.HasAttribute("href"))
                        continue;

                    string imageLink = fullImageElement.GetAttribute("href");

                    imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink));
                }
            }

            return new WebNovelChapter
            {
                Url = link.Url,
                Content = contentElement.InnerHtml
            };
        }