/// <summary> /// In-lines the CSS for the current HTML /// </summary> /// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param> /// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param> /// <param name="css">A string containing a style-sheet for inlining.</param> /// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param> /// <param name="removeComments">True to remove comments, false to leave them intact</param> /// <param name="keepMediaQueries">True to add back any mediaqueries</param> /// <returns>Returns the html input, with styles moved to inline attributes.</returns> public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, bool keepMediaQueries = false) { // Store the variables used for inlining the CSS _removeStyleElements = removeStyleElements; _stripIdAndClassAttributes = stripIdAndClassAttributes; _ignoreElements = ignoreElements; _keepMediaQueries = keepMediaQueries; _css = css; // Gather all of the CSS that we can work with. var cssSourceNodes = CssSourceNodes(); var cssLinkNodes = CssLinkNodes(); var cssSources = new List <ICssSource>(ConvertToStyleSources(cssSourceNodes)); cssSources.AddRange(ConvertToStyleSources(cssLinkNodes)); var cssBlocks = GetCssBlocks(cssSources); if (_removeStyleElements) { RemoveStyleElements(cssSourceNodes); RemoveStyleElements(cssLinkNodes); } var joinedStyles = Join(cssBlocks); var joinedBlocks = joinedStyles.Styles; var mediaQueries = joinedStyles.MediaQueries; var validSelectors = CleanUnsupportedSelectors(joinedBlocks); var elementsWithStyles = FindElementsWithStyles(validSelectors); var mergedStyles = MergeStyleClasses(elementsWithStyles); StyleClassApplier.ApplyAllStyles(mergedStyles); if (_stripIdAndClassAttributes) { StripElementAttributes("id", "class"); } if (removeStyleElements && _keepMediaQueries) { var styleElem = _document.CreateElement("style"); styleElem.TextContent = String.Join(",", mediaQueries); _document.Body.Prepend(styleElem); } if (removeComments) { var comments = _document.Descendents <IComment>().ToList(); foreach (var comment in comments) { comment.Remove(); } } var html = _document.ToHtml(new AutoSelectedMarkupFormatter(_document.Doctype)); return(new InlineResult(html, _warnings)); }
/// <summary> /// In-lines the CSS for the current HTML /// </summary> /// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param> /// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param> /// <param name="css">A string containing a style-sheet for inlining.</param> /// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param> /// <param name="removeComments">True to remove comments, false to leave them intact</param> /// <param name="precompiledStyles"></param> /// <returns>Returns the html input, with styles moved to inline attributes.</returns> public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, SortedList <string, StyleClass> precompiledStyles = null) { // Store the variables used for inlining the CSS _removeStyleElements = removeStyleElements; _stripIdAndClassAttributes = stripIdAndClassAttributes; _ignoreElements = ignoreElements; // Gather all of the CSS that we can work with. var cssSourceNodes = CssSourceNodes(); var cssLinkNodes = CssLinkNodes(); var cssSources = new List <ICssSource>(ConvertToStyleSources(cssSourceNodes)); cssSources.AddRange(ConvertToStyleSources(cssLinkNodes)); cssSources.AddRange(PreMailer.ConvertToStyleSources(css)); var cssBlocks = PreMailer.GetCssBlocks(cssSources); if (_removeStyleElements) { RemoveStyleElements(cssSourceNodes); RemoveStyleElements(cssLinkNodes); } var joinedBlocks = PreMailer.Join(cssBlocks); var validSelectors = CleanUnsupportedSelectors(joinedBlocks); if (precompiledStyles != null) { precompiledStyles.ToList().ForEach(kvp => { validSelectors.Add(kvp.Key, kvp.Value); }); } var elementsWithStyles = FindElementsWithStyles(validSelectors); var mergedStyles = MergeStyleClasses(elementsWithStyles); StyleClassApplier.ApplyAllStyles(mergedStyles); if (_stripIdAndClassAttributes) { StripElementAttributes("id", "class"); } if (removeComments) { var comments = _document.Descendents <IComment>().ToList(); foreach (var comment in comments) { comment.Remove(); } } var html = _document.ToHtml(new AutoSelectedMarkupFormatter(_document.Doctype)); return(new InlineResult(html, _warnings)); }
/// <summary> /// In-lines the CSS for the current HTML /// </summary> /// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param> /// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param> /// <param name="css">A string containing a style-sheet for inlining.</param> /// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param> /// <param name="removeComments">True to remove comments, false to leave them intact</param> /// <returns>Returns the html input, with styles moved to inline attributes.</returns> public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false) { // Store the variables used for inlining the CSS _removeStyleElements = removeStyleElements; _stripIdAndClassAttributes = stripIdAndClassAttributes; _ignoreElements = ignoreElements; _css = css; // Gather all of the CSS that we can work with. var cssSourceNodes = CssSourceNodes(); var cssLinkNodes = CssLinkNodes(); var cssSources = new List <ICssSource>(ConvertToStyleSources(cssSourceNodes)); cssSources.AddRange(ConvertToStyleSources(cssLinkNodes)); var cssBlocks = GetCssBlocks(cssSources); if (_removeStyleElements) { RemoveStyleElements(cssSourceNodes); RemoveStyleElements(cssLinkNodes); } var joinedBlocks = Join(cssBlocks); var validSelectors = CleanUnsupportedSelectors(joinedBlocks); var elementsWithStyles = FindElementsWithStyles(validSelectors); var mergedStyles = MergeStyleClasses(elementsWithStyles); StyleClassApplier.ApplyAllStyles(mergedStyles); if (_stripIdAndClassAttributes) { StripElementAttributes("id", "class"); } if (removeComments) { var comments = _document.Descendents <IComment>().ToList(); foreach (var comment in comments) { comment.Remove(); } } IMarkupFormatter markupFormatter = GetMarkupFormatterForDocType(); using (var sw = new StringWriter()) { _document.ToHtml(sw, markupFormatter); return(new InlineResult(sw.GetStringBuilder(), _warnings)); } }
public override async Task <IEnumerable <ChapterLink> > GetChapterLinksAsync(string baseUrl, CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(baseUrl, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); var pgElement = doc.DocumentElement.FirstWhereHasClass(PageClasses); IElement element = pgElement ?? doc.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "article"); if (element == null) { return(EmptyLinks); } return(CollectChapterLinks(baseUrl, element.Descendents <IElement>())); }
public override async Task <IEnumerable <ChapterLink> > GetChapterLinksAsync(string baseUrl, CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(baseUrl, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); IElement contentElement = (from e in doc.Descendents <IElement>() where e.LocalName == "div" where e.HasAttribute("class") let names = e.GetAttribute("class").Split(' ') from cl in PostClasses where names.Any(p => p.IndexOf(cl, StringComparison.OrdinalIgnoreCase) >= 0) select e).FirstOrDefault(); if (contentElement == null) { return(new List <ChapterLink>()); } return(CollectChapterLinks(baseUrl, contentElement.Descendents <IElement>())); }
public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken)) { string baseContent = await GetWebPageAsync(link.Url, token); IHtmlDocument doc = await Parser.ParseAsync(baseContent, token); IElement contentElement = doc.GetElementById("mw-content-text"); if (contentElement == null) { return(null); } doc.GetElementById("toc")?.Remove(); RemoveTables(contentElement); foreach (IElement linkElement in contentElement.Descendents <IElement>().Where(p => p.LocalName == "a")) { if (!linkElement.HasAttribute("href")) { continue; } string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href")); linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel)); IElement imgElement = linkElement.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "img"); if (imgElement != null) { foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList()) { imgElement.RemoveAttribute(attrib.Name); } string linkImgUrl = linkElement.GetAttribute("href"); string imgPageContent = await GetWebPageAsync(linkImgUrl, token); IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token); IElement fullImageElement = (from e in imgDoc.Descendents <IElement>() where e.LocalName == "div" where e.HasAttribute("class") let classAttribute = e.GetAttribute("class") where classAttribute == "fullMedia" let imgLink = e.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "a") select imgLink).FirstOrDefault(); if (fullImageElement == null || !fullImageElement.HasAttribute("href")) { continue; } string imageLink = fullImageElement.GetAttribute("href"); imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink)); } } return(new WebNovelChapter { Url = link.Url, Content = contentElement.InnerHtml }); }