public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string content = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(content, token); var paged = GetPagedChapterUrls(doc.DocumentElement); WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token); if (chapter == null) { return(null); } chapter.Url = link.Url; chapter.NextChapterUrl = UrlHelper.ToAbsoluteUrl(link.Url, chapter.NextChapterUrl); foreach (var page in paged) { string pageContent = await GetWebPageAsync(page, token); IHtmlDocument pageDoc = await Parser.ParseAsync(pageContent, token); chapter.Content += ParseChapter(pageDoc.DocumentElement, token).Content; } return(chapter); }
public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string pageContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(pageContent, token); IElement postBodyEl = (from e in doc.All where e.LocalName == "div" where e.HasAttribute("class") let classAttribute = e.GetAttribute("class") where classAttribute.Contains("post_body") select e).FirstOrDefault(); if (postBodyEl == null) return null; RemoveNavigation(postBodyEl); RemoveDonation(postBodyEl); ExpandSpoilers(postBodyEl); RemoveEmptyTags(postBodyEl); var content = CleanupHTML(postBodyEl.InnerHtml); return new WebNovelChapter { Url = link.Url, Content = content }; }
public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string pageContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(pageContent, token); IElement postBodyEl = doc.QuerySelector(".chapter-content"); if (postBodyEl == null) { return(null); } RemoveNavigation(postBodyEl); RemoveAdvertisements(postBodyEl); ExpandSpoilers(postBodyEl); RemoveFontStyle(postBodyEl); var content = new ContentCleanup(BaseUrl).Execute(doc, postBodyEl); return(new WebNovelChapter { Url = link.Url, Content = content }); }
public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string pageContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(pageContent, token); IElement postBodyEl = (from e in doc.All where e.LocalName == "div" where e.HasAttribute("class") let classAttribute = e.GetAttribute("class") where classAttribute.Contains("post_body") select e).FirstOrDefault(); if (postBodyEl == null) { return(null); } RemoveNavigation(postBodyEl); RemoveDonation(postBodyEl); ExpandSpoilers(postBodyEl); RemoveEmptyTags(postBodyEl); var content = CleanupHTML(postBodyEl.InnerHtml); return(new WebNovelChapter { Url = link.Url, Content = content }); }
public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string content = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(content, token); return(new WebNovelChapter() { Content = new ContentCleanup(BaseUrl).Execute(doc, doc.QuerySelector("#storytext")) }); }
public override async Task<WebNovelChapter> GetChapterAsync( ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); IElement titleElement = doc.DocumentElement.FirstWhereHasClass(TitleClasses); WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token); chapter.Url = link.Url; if (titleElement != null) chapter.ChapterName = titleElement.Text().Trim(); return chapter; }
public override async Task <WebNovelChapter> GetChapterAsync( ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); IElement titleElement = doc.DocumentElement.FirstWhereHasClass(TitleClasses); WebNovelChapter chapter = ParseChapter(doc, link.Url, doc.DocumentElement, token); chapter.Url = link.Url; if (titleElement != null) { chapter.ChapterName = titleElement.Text().Trim(); } return(chapter); }
public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string content = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(content, token); IElement titleElement = doc.DocumentElement.FirstWhereHasClass(ChapterTitleClasses); IElement chapterElement = doc.DocumentElement.FirstWhereHasClass(ChapterClasses); var chContentElements = chapterElement.WhereHasClass(ChapterContentClasses, element => element.LocalName == "sentence"); string contents = string.Join("<br/><br/>", chContentElements.Select(p => p.InnerHtml)); string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href"); return new WebNovelChapter { ChapterName = titleElement?.TextContent, Content = contents, NextChapterUrl = nextChapter }; }
public virtual Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { throw new NotImplementedException(); }
public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string content = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(content, token); IElement titleElement = doc.DocumentElement.FirstWhereHasClass(ChapterTitleClasses); IElement chapterElement = doc.DocumentElement.FirstWhereHasClass(ChapterClasses); var chContentElements = chapterElement.WhereHasClass(ChapterContentClasses, element => element.LocalName == "sentence"); string contents = string.Join("<br/><br/>", chContentElements.Select(p => p.InnerHtml)); string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href"); return(new WebNovelChapter { ChapterName = titleElement?.TextContent, Content = contents, NextChapterUrl = nextChapter }); }
public virtual Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { throw new NotImplementedException(); }
public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string content = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(content, token); IElement titleElement = doc.DocumentElement.QuerySelector(".chapter-title"); IElement chapterElement = doc.DocumentElement.QuerySelector(".chapter-body"); // Append paragraphs after each "sentence.translated" element. chapterElement .QuerySelectorAll("sentence.translated") .ToList() .ForEach((obj) => obj.AppendChild(doc.CreateElement("P"))); var contentEl = doc.CreateElement("P"); contentEl.InnerHtml = string.Join("", chapterElement .QuerySelectorAll("sentence.translated") .Select(x => x.InnerHtml)); RemoveSpecialTags(doc, contentEl); string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href"); return(new WebNovelChapter { ChapterName = titleElement?.GetInnerText(), Content = new ContentCleanup(BaseUrl).Execute(doc, contentEl), NextChapterUrl = nextChapter }); }
public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string content = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(content, token); var paged = GetPagedChapterUrls(doc.DocumentElement); WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token); if (chapter == null) return null; chapter.Url = link.Url; chapter.NextChapterUrl = UrlHelper.ToAbsoluteUrl(link.Url, chapter.NextChapterUrl); foreach (var page in paged) { string pageContent = await GetWebPageAsync(page, token); IHtmlDocument pageDoc = await Parser.ParseAsync(pageContent, token); chapter.Content += ParseChapter(pageDoc.DocumentElement, token).Content; } return chapter; }
public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); IElement contentElement = doc.GetElementById("mw-content-text"); if (contentElement == null) { return(null); } doc.GetElementById("toc")?.Remove(); RemoveTables(contentElement); foreach (IElement linkElement in contentElement.Descendents <IElement>().Where(p => p.LocalName == "a")) { if (!linkElement.HasAttribute("href")) { continue; } string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href")); linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel)); IElement imgElement = linkElement.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "img"); if (imgElement != null) { foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList()) { imgElement.RemoveAttribute(attrib.Name); } string linkImgUrl = linkElement.GetAttribute("href"); string imgPageContent = await GetWebPageAsync(linkImgUrl, token); IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token); IElement fullImageElement = (from e in imgDoc.Descendents <IElement>() where e.LocalName == "div" where e.HasAttribute("class") let classAttribute = e.GetAttribute("class") where classAttribute == "fullMedia" let imgLink = e.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "a") select imgLink).FirstOrDefault(); if (fullImageElement == null || !fullImageElement.HasAttribute("href")) { continue; } string imageLink = fullImageElement.GetAttribute("href"); imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink)); } } return(new WebNovelChapter { Url = link.Url, Content = contentElement.InnerHtml }); }
public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); IElement contentElement = doc.GetElementById("mw-content-text"); if (contentElement == null) return null; doc.GetElementById("toc")?.Remove(); RemoveTables(contentElement); foreach (IElement linkElement in contentElement.Descendents<IElement>().Where(p => p.LocalName == "a")) { if (!linkElement.HasAttribute("href")) continue; string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href")); linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel)); IElement imgElement = linkElement.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "img"); if (imgElement != null) { foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList()) imgElement.RemoveAttribute(attrib.Name); string linkImgUrl = linkElement.GetAttribute("href"); string imgPageContent = await GetWebPageAsync(linkImgUrl, token); IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token); IElement fullImageElement = (from e in imgDoc.Descendents<IElement>() where e.LocalName == "div" where e.HasAttribute("class") let classAttribute = e.GetAttribute("class") where classAttribute == "fullMedia" let imgLink = e.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "a") select imgLink).FirstOrDefault(); if (fullImageElement == null || !fullImageElement.HasAttribute("href")) continue; string imageLink = fullImageElement.GetAttribute("href"); imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink)); } } return new WebNovelChapter { Url = link.Url, Content = contentElement.InnerHtml }; }