Example #1
0
        /// <summary>
        /// In-lines the CSS for the current HTML
        /// </summary>
        /// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
        /// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
        /// <param name="css">A string containing a style-sheet for inlining.</param>
        /// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
        /// <param name="removeComments">True to remove comments, false to leave them intact</param>
        /// <param name="keepMediaQueries">True to add back any mediaqueries</param>
        /// <returns>Returns the html input, with styles moved to inline attributes.</returns>
        public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, bool keepMediaQueries = false)
        {
            // Store the variables used for inlining the CSS
            _removeStyleElements       = removeStyleElements;
            _stripIdAndClassAttributes = stripIdAndClassAttributes;
            _ignoreElements            = ignoreElements;
            _keepMediaQueries          = keepMediaQueries;
            _css = css;

            // Gather all of the CSS that we can work with.
            var cssSourceNodes = CssSourceNodes();
            var cssLinkNodes   = CssLinkNodes();
            var cssSources     = new List <ICssSource>(ConvertToStyleSources(cssSourceNodes));

            cssSources.AddRange(ConvertToStyleSources(cssLinkNodes));

            var cssBlocks = GetCssBlocks(cssSources);

            if (_removeStyleElements)
            {
                RemoveStyleElements(cssSourceNodes);
                RemoveStyleElements(cssLinkNodes);
            }

            var joinedStyles       = Join(cssBlocks);
            var joinedBlocks       = joinedStyles.Styles;
            var mediaQueries       = joinedStyles.MediaQueries;
            var validSelectors     = CleanUnsupportedSelectors(joinedBlocks);
            var elementsWithStyles = FindElementsWithStyles(validSelectors);
            var mergedStyles       = MergeStyleClasses(elementsWithStyles);

            StyleClassApplier.ApplyAllStyles(mergedStyles);

            if (_stripIdAndClassAttributes)
            {
                StripElementAttributes("id", "class");
            }

            if (removeStyleElements && _keepMediaQueries)
            {
                var styleElem = _document.CreateElement("style");
                styleElem.TextContent = String.Join(",", mediaQueries);
                _document.Body.Prepend(styleElem);
            }

            if (removeComments)
            {
                var comments = _document.Descendents <IComment>().ToList();

                foreach (var comment in comments)
                {
                    comment.Remove();
                }
            }

            var html = _document.ToHtml(new AutoSelectedMarkupFormatter(_document.Doctype));

            return(new InlineResult(html, _warnings));
        }
Example #2
0
        /// <summary>
        /// In-lines the CSS for the current HTML
        /// </summary>
        /// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
        /// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
        /// <param name="css">A string containing a style-sheet for inlining.</param>
        /// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
        /// <param name="removeComments">True to remove comments, false to leave them intact</param>
        /// <param name="precompiledStyles"></param>
        /// <returns>Returns the html input, with styles moved to inline attributes.</returns>
        public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, SortedList <string, StyleClass> precompiledStyles = null)
        {
            // Store the variables used for inlining the CSS
            _removeStyleElements       = removeStyleElements;
            _stripIdAndClassAttributes = stripIdAndClassAttributes;
            _ignoreElements            = ignoreElements;

            // Gather all of the CSS that we can work with.
            var cssSourceNodes = CssSourceNodes();
            var cssLinkNodes   = CssLinkNodes();
            var cssSources     = new List <ICssSource>(ConvertToStyleSources(cssSourceNodes));

            cssSources.AddRange(ConvertToStyleSources(cssLinkNodes));
            cssSources.AddRange(PreMailer.ConvertToStyleSources(css));

            var cssBlocks = PreMailer.GetCssBlocks(cssSources);

            if (_removeStyleElements)
            {
                RemoveStyleElements(cssSourceNodes);
                RemoveStyleElements(cssLinkNodes);
            }

            var joinedBlocks   = PreMailer.Join(cssBlocks);
            var validSelectors = CleanUnsupportedSelectors(joinedBlocks);

            if (precompiledStyles != null)
            {
                precompiledStyles.ToList().ForEach(kvp => { validSelectors.Add(kvp.Key, kvp.Value); });
            }

            var elementsWithStyles = FindElementsWithStyles(validSelectors);
            var mergedStyles       = MergeStyleClasses(elementsWithStyles);

            StyleClassApplier.ApplyAllStyles(mergedStyles);

            if (_stripIdAndClassAttributes)
            {
                StripElementAttributes("id", "class");
            }

            if (removeComments)
            {
                var comments = _document.Descendents <IComment>().ToList();

                foreach (var comment in comments)
                {
                    comment.Remove();
                }
            }

            var html = _document.ToHtml(new AutoSelectedMarkupFormatter(_document.Doctype));

            return(new InlineResult(html, _warnings));
        }
Example #3
0
        /// <summary>
        /// In-lines the CSS for the current HTML
        /// </summary>
        /// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
        /// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
        /// <param name="css">A string containing a style-sheet for inlining.</param>
        /// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
        /// <param name="removeComments">True to remove comments, false to leave them intact</param>
        /// <returns>Returns the html input, with styles moved to inline attributes.</returns>
        public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false)
        {
            // Store the variables used for inlining the CSS
            _removeStyleElements       = removeStyleElements;
            _stripIdAndClassAttributes = stripIdAndClassAttributes;
            _ignoreElements            = ignoreElements;
            _css = css;

            // Gather all of the CSS that we can work with.
            var cssSourceNodes = CssSourceNodes();
            var cssLinkNodes   = CssLinkNodes();
            var cssSources     = new List <ICssSource>(ConvertToStyleSources(cssSourceNodes));

            cssSources.AddRange(ConvertToStyleSources(cssLinkNodes));

            var cssBlocks = GetCssBlocks(cssSources);

            if (_removeStyleElements)
            {
                RemoveStyleElements(cssSourceNodes);
                RemoveStyleElements(cssLinkNodes);
            }

            var joinedBlocks       = Join(cssBlocks);
            var validSelectors     = CleanUnsupportedSelectors(joinedBlocks);
            var elementsWithStyles = FindElementsWithStyles(validSelectors);
            var mergedStyles       = MergeStyleClasses(elementsWithStyles);

            StyleClassApplier.ApplyAllStyles(mergedStyles);

            if (_stripIdAndClassAttributes)
            {
                StripElementAttributes("id", "class");
            }

            if (removeComments)
            {
                var comments = _document.Descendents <IComment>().ToList();

                foreach (var comment in comments)
                {
                    comment.Remove();
                }
            }

            IMarkupFormatter markupFormatter = GetMarkupFormatterForDocType();

            using (var sw = new StringWriter())
            {
                _document.ToHtml(sw, markupFormatter);

                return(new InlineResult(sw.GetStringBuilder(), _warnings));
            }
        }
Example #4
0
        public override async Task <IEnumerable <ChapterLink> > GetChapterLinksAsync(string baseUrl, CancellationToken token = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(baseUrl, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            var pgElement = doc.DocumentElement.FirstWhereHasClass(PageClasses);

            IElement element = pgElement ?? doc.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "article");

            if (element == null)
            {
                return(EmptyLinks);
            }

            return(CollectChapterLinks(baseUrl, element.Descendents <IElement>()));
        }
        public override async Task <IEnumerable <ChapterLink> > GetChapterLinksAsync(string baseUrl, CancellationToken token = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(baseUrl, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement contentElement = (from e in doc.Descendents <IElement>()
                                       where e.LocalName == "div"
                                       where e.HasAttribute("class")
                                       let names = e.GetAttribute("class").Split(' ')
                                                   from cl in PostClasses
                                                   where names.Any(p => p.IndexOf(cl, StringComparison.OrdinalIgnoreCase) >= 0)
                                                   select e).FirstOrDefault();

            if (contentElement == null)
            {
                return(new List <ChapterLink>());
            }

            return(CollectChapterLinks(baseUrl, contentElement.Descendents <IElement>()));
        }
Example #6
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement contentElement = doc.GetElementById("mw-content-text");

            if (contentElement == null)
            {
                return(null);
            }

            doc.GetElementById("toc")?.Remove();

            RemoveTables(contentElement);

            foreach (IElement linkElement in contentElement.Descendents <IElement>().Where(p => p.LocalName == "a"))
            {
                if (!linkElement.HasAttribute("href"))
                {
                    continue;
                }

                string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href"));

                linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel));

                IElement imgElement = linkElement.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "img");

                if (imgElement != null)
                {
                    foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList())
                    {
                        imgElement.RemoveAttribute(attrib.Name);
                    }

                    string linkImgUrl     = linkElement.GetAttribute("href");
                    string imgPageContent = await GetWebPageAsync(linkImgUrl, token);

                    IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token);

                    IElement fullImageElement = (from e in imgDoc.Descendents <IElement>()
                                                 where e.LocalName == "div"
                                                 where e.HasAttribute("class")
                                                 let classAttribute = e.GetAttribute("class")
                                                                      where classAttribute == "fullMedia"
                                                                      let imgLink = e.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "a")
                                                                                    select imgLink).FirstOrDefault();

                    if (fullImageElement == null || !fullImageElement.HasAttribute("href"))
                    {
                        continue;
                    }

                    string imageLink = fullImageElement.GetAttribute("href");

                    imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink));
                }
            }

            return(new WebNovelChapter
            {
                Url = link.Url,
                Content = contentElement.InnerHtml
            });
        }