public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            IElement titleElement   = doc.DocumentElement.QuerySelector(".chapter-title");
            IElement chapterElement = doc.DocumentElement.QuerySelector(".chapter-body");

            // Append paragraphs after each "sentence.translated" element.
            chapterElement
            .QuerySelectorAll("sentence.translated")
            .ToList()
            .ForEach((obj) => obj.AppendChild(doc.CreateElement("P")));
            var contentEl = doc.CreateElement("P");

            contentEl.InnerHtml = string.Join("", chapterElement
                                              .QuerySelectorAll("sentence.translated")
                                              .Select(x => x.InnerHtml));
            RemoveSpecialTags(doc, contentEl);

            string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href");

            return(new WebNovelChapter
            {
                ChapterName = titleElement?.GetInnerText(),
                Content = new ContentCleanup(BaseUrl).Execute(doc, contentEl),
                NextChapterUrl = nextChapter
            });
        }
        public static INode CreateHidden(this IHtmlDocument document, string name, string value)
        {
            var challengeAttr = document.CreateElement("input");

            document.CreateElement("input");
            challengeAttr.SetAttribute("name", name);
            challengeAttr.SetAttribute("value", value);
            challengeAttr.SetAttribute("type", "hidden");
            return(challengeAttr);
        }
Example #3
0
        private void SetNav(IHtmlDocument indexDoc, IEnumerable <string> postStrings)
        {
            var tds    = indexDoc.QuerySelectorAll("#pageBar tr.pages_str td");
            var tdPrev = tds.First();
            var tdNext = tds.Last();

            var anchorPrev = indexDoc.CreateElement("a");

            anchorPrev.TextContent = "< предыдущая";
            anchorPrev.SetAttribute("href", "#");
            anchorPrev.Id    = "anchorPrev";
            tdPrev.InnerHtml = "";
            tdPrev.AppendChild(anchorPrev);

            var anchorNext = indexDoc.CreateElement("a");

            anchorNext.TextContent = "следующая >";
            anchorNext.SetAttribute("href", "#");
            anchorNext.Id    = "anchorNext";
            tdNext.InnerHtml = "";
            tdNext.AppendChild(anchorNext);

            var td = indexDoc.QuerySelectorAll("#pageBar tr").Last().QuerySelector("td");

            td.InnerHtml = "";
            td.Id        = "tdPages";
            // var pageCount = Convert.ToInt32(Math.Ceiling(1.0 * indexDoc.QuerySelectorAll("div.singlePost").Count() / Constants.ArchivePageSize));
            // for (var i = 1; i <= pageCount; i++)
            // {
            //     var pageAnchor = indexDoc.CreateElement(i == 1 ? "strong" : "a");
            //     pageAnchor.InnerHtml = Convert.ToString(i);
            //     pageAnchor.SetAttribute("href", $"#{i}");
            //     pageAnchor.ClassList.Add("pageAnchor");
            //     pageAnchor.SetAttribute("page", i.ToString());
            //     td.AppendChild(pageAnchor);
            // }

            var serializedData = JsonConvert.SerializeObject(postStrings.Reverse(), new JsonSerializerSettings
            {
                ContractResolver = new CamelCasePropertyNamesContractResolver(),
                Formatting       = Formatting.Indented
            });

            var script = indexDoc.CreateElement("script");

            script.SetAttribute("type", "text/javascript");
            script.InnerHtml = $@"
            $(function(){{
                postStrings = {serializedData};
                initPages({Constants.ArchivePageSize});
            }});
                
            ";
            indexDoc.QuerySelector("head").AppendChild(script);
        }
Example #4
0
        public static Post ParsePost(IHtmlDocument parent, IElement doc)
        {
            var post = new Post();

            post.User   = UserHandler.ParseUserFromPost(doc);
            post.PostId = Convert.ToInt64(doc.Id.Replace("post", ""));
            var authorTd = doc.QuerySelector(@"[class*=""userid""]");

            authorTd.Remove();

            post.HasSeen = doc.QuerySelector(@"[class=""seen1""]") != null || doc.QuerySelector(@"[class=""seen2""]") != null;

            var threadBody = doc.QuerySelector(".postbody");

            if (threadBody != null)
            {
                var jerkBody = threadBody.QuerySelector(@"a[title=""DON'T DO IT!!""]");
                if (jerkBody != null)
                {
                    post.IsIgnored = true;
                }
                else
                {
                    var imgurGifs = threadBody.QuerySelectorAll(@"[src*=""imgur.com""][src*="".gif""]");
                    for (var i = 0; i < imgurGifs.Length; i++)
                    {
                        var imgurGif = imgurGifs[i];
                        var div      = parent.CreateElement("div");
                        div.ClassList.Add("gifWrap");
                        var newImgur = parent.CreateElement("img");
                        newImgur.ClassList.Add("imgurGif");
                        newImgur.SetAttribute("data-originalurl", imgurGif.GetAttribute("src"));
                        newImgur.SetAttribute("data-posterurl", imgurGif.GetAttribute("src").Replace(".gif", "h.jpg"));
                        newImgur.SetAttribute("src", imgurGif.GetAttribute("src").Replace(".gif", "h.jpg"));
                        div.AppendChild(newImgur);
                        imgurGif.Replace(div);
                    }
                    var attachments = threadBody.QuerySelectorAll(@"[src*=""attachment.php""]");
                    foreach (var attachment in attachments)
                    {
                        attachment.SetAttribute("src", $"https://forums.somethingawful.com/{attachment.Attributes["src"].Value}");
                    }
                }
                post.PostHtml = HtmlEncode(threadBody.InnerHtml);
            }

            return(post);
        }
Example #5
0
        private void ParseHTagsAndAddAnchors(IHtmlDocument document, IElement parentElement)
        {
            foreach (IElement element in parentElement.Children)
            {
                string tagName = element.NodeName.ToLower();
                string title   = element.TextContent;

                if (tagName.StartsWith("h") && tagName.Length == 2)
                {
                    // Use the H number (e.g. 2 for H2) as the current level in the tree
                    int level = 0;
                    int.TryParse(tagName.ToLower().Replace("h", ""), out level);

                    // Level sanity check for bad markup
                    if (level > 1)
                    {
                        Item item = _tree.AddItemAtLevel(level, title);

                        // Insert an achor tag after the header as a reference
                        IElement anchor = document.CreateElement("a");
                        anchor.SetAttribute("name", item.Id);

                        element.InnerHtml = anchor.OuterHtml + element.InnerHtml;
                    }
                }
                else if (element.HasChildNodes)
                {
                    ParseHTagsAndAddAnchors(document, element);
                }
            }
        }
Example #6
0
        protected virtual void TransformHeadings(IHtmlDocument document, int from, int to)
        {
            var fromNodes = document.QuerySelectorAll($"h{from}");

            foreach (var fromNode in fromNodes)
            {
                var parent = fromNode.Parent;

                if (to == 5)
                {
                    ReplaceChildElementByText(parent, fromNode, document);
                }
                else
                {
                    var newElement = document.CreateElement($"h{to}");
                    newElement.InnerHtml = fromNode.InnerHtml;

                    // Copy the text alignment style
                    if (fromNode.Style != null && !string.IsNullOrEmpty(fromNode.Style.TextAlign))
                    {
                        newElement.Style.TextAlign = fromNode.Style.TextAlign;
                    }
                    parent.ReplaceChild(newElement, fromNode);
                }
            }
        }
Example #7
0
        /// <summary>
        /// In-lines the CSS for the current HTML
        /// </summary>
        /// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
        /// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
        /// <param name="css">A string containing a style-sheet for inlining.</param>
        /// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
        /// <param name="removeComments">True to remove comments, false to leave them intact</param>
        /// <param name="keepMediaQueries">True to add back any mediaqueries</param>
        /// <returns>Returns the html input, with styles moved to inline attributes.</returns>
        public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, bool keepMediaQueries = false)
        {
            // Store the variables used for inlining the CSS
            _removeStyleElements       = removeStyleElements;
            _stripIdAndClassAttributes = stripIdAndClassAttributes;
            _ignoreElements            = ignoreElements;
            _keepMediaQueries          = keepMediaQueries;
            _css = css;

            // Gather all of the CSS that we can work with.
            var cssSourceNodes = CssSourceNodes();
            var cssLinkNodes   = CssLinkNodes();
            var cssSources     = new List <ICssSource>(ConvertToStyleSources(cssSourceNodes));

            cssSources.AddRange(ConvertToStyleSources(cssLinkNodes));

            var cssBlocks = GetCssBlocks(cssSources);

            if (_removeStyleElements)
            {
                RemoveStyleElements(cssSourceNodes);
                RemoveStyleElements(cssLinkNodes);
            }

            var joinedStyles       = Join(cssBlocks);
            var joinedBlocks       = joinedStyles.Styles;
            var mediaQueries       = joinedStyles.MediaQueries;
            var validSelectors     = CleanUnsupportedSelectors(joinedBlocks);
            var elementsWithStyles = FindElementsWithStyles(validSelectors);
            var mergedStyles       = MergeStyleClasses(elementsWithStyles);

            StyleClassApplier.ApplyAllStyles(mergedStyles);

            if (_stripIdAndClassAttributes)
            {
                StripElementAttributes("id", "class");
            }

            if (removeStyleElements && _keepMediaQueries)
            {
                var styleElem = _document.CreateElement("style");
                styleElem.TextContent = String.Join(",", mediaQueries);
                _document.Body.Prepend(styleElem);
            }

            if (removeComments)
            {
                var comments = _document.Descendents <IComment>().ToList();

                foreach (var comment in comments)
                {
                    comment.Remove();
                }
            }

            var html = _document.ToHtml(new AutoSelectedMarkupFormatter(_document.Doctype));

            return(new InlineResult(html, _warnings));
        }
Example #8
0
        public static void SaveDefault(IHtmlDocument doc, string text, string type)
        {
            var script = doc.CreateElement("script");

            script.TextContent = text;
            script.SetAttribute("type", type);
            doc.Head.AppendChild(script);
        }
Example #9
0
        private void SetMetaElements(IHeadElementHelperStore store, IHtmlDocument doc)
        {
            if (store.MetaElementCommands.Count == 0)
            {
                return;
            }

            var metaTags = doc.Head.QuerySelectorAll("meta[name],meta[property],meta[http-equiv]").Cast <IHtmlMetaElement>().ToList();

            var metaElements = metaTags.Select(m => new MetaElement
            {
                Name      = m.Name ?? "",
                Property  = m.GetAttribute("property") ?? "",
                HttpEquiv = m.HttpEquivalent ?? "",
                Content   = m.Content
            });

            SaveDefault(doc, metaElements, "text/default-meta-elements");

            foreach (var cmd in store.MetaElementCommands)
            {
                var meta = metaTags.FirstOrDefault(m =>
                                                   (cmd.Element.Name != "" && cmd.Element.Name == m.Name) ||
                                                   (cmd.Element.Property != "" && cmd.Element.Property == m.GetAttribute("property")) ||
                                                   (cmd.Element.HttpEquiv != "" && cmd.Element.HttpEquiv == m.HttpEquivalent));

                if (cmd.Operation == MetaElementOperations.Set)
                {
                    if (meta == null)
                    {
                        meta = doc.CreateElement("meta") as IHtmlMetaElement;
                        if (cmd.Element.Name != "")
                        {
                            meta.Name = cmd.Element.Name;
                        }
                        if (cmd.Element.Property != "")
                        {
                            meta.SetAttribute("property", cmd.Element.Property);
                        }
                        if (cmd.Element.HttpEquiv != "")
                        {
                            meta.HttpEquivalent = cmd.Element.HttpEquiv;
                        }
                        doc.Head.AppendChild(meta);
                        metaTags.Add(meta);
                    }
                    meta.Content = cmd.Element.Content;
                }
                else if (cmd.Operation == MetaElementOperations.Remove)
                {
                    if (meta != null)
                    {
                        doc.Head.RemoveChild(meta);
                        metaTags.Remove(meta);
                    }
                }
            }
        }
Example #10
0
        private void AddScripts(IHtmlDocument doc, string dirPrefix)
        {
            foreach (var script in doc.QuerySelectorAll("script"))
            {
                script.Remove();
            }

            var scriptElPost = doc.CreateElement("script");

            scriptElPost.SetAttribute("type", "text/javascript");
            scriptElPost.SetAttribute("src", dirPrefix + Constants.AccountPagesDir + "/" + Constants.JQueryFileName);
            doc.QuerySelector("head").AppendChild(scriptElPost);

            scriptElPost = doc.CreateElement("script");
            scriptElPost.SetAttribute("type", "text/javascript");
            scriptElPost.SetAttribute("src", dirPrefix + Constants.AccountPagesDir + "/" + Constants.DiaryJsFileName);
            doc.QuerySelector("head").AppendChild(scriptElPost);
        }
Example #11
0
        protected virtual void TransformBlockQuotes(IHtmlCollection <IElement> blockQuotes, IHtmlDocument document)
        {
            int   level       = 1;
            INode blockParent = null;

            foreach (var blockQuote in blockQuotes)
            {
                var parent = blockQuote.Parent;
                if (blockQuote.OuterHtml.ToLower().Contains("margin:0px 0px 0px 40px"))
                {
                    if (blockQuote.ChildElementCount > 0 && blockQuote.Children[0].TagName.ToLower() == "blockquote")
                    {
                        blockParent = blockQuote;
                        level++;
                    }
                    else
                    {
                        var newElement = document.CreateElement($"p");
                        // Drop P as nested P is not allowed in clean html
                        // TODO: do this in a better way
                        newElement.InnerHtml = blockQuote.InnerHtml.Replace("<p>", "").Replace("</p>", "").Replace("<P>", "").Replace("</P>", "");
                        newElement.SetAttribute($"style", $"margin-left:{level * 40}px;");

                        switch (level)
                        {
                        case 1:
                        {
                            parent.ReplaceChild(newElement, blockQuote);
                            break;
                        }

                        case 2:
                        {
                            blockParent.Parent.ReplaceChild(newElement, blockParent);
                            break;
                        }

                        case 3:
                        {
                            blockParent.Parent.Parent.ReplaceChild(newElement, blockParent.Parent);
                            break;
                        }

                        case 4:
                        {
                            blockParent.Parent.Parent.Parent.ReplaceChild(newElement, blockParent.Parent.Parent);
                            break;
                        }
                        }

                        level = 1;
                    }
                }
            }
        }
Example #12
0
        private void PostProcessTransformDanglingTextToPElem(IElement target)
        {
            foreach (var child in target.Children.Where(p => p.NodeType == NodeType.Text).ToList())
            {
                var newElem = rootDoc.CreateElement("p");
                newElem.TextContent = child.TextContent.RegexTrimAndNormalize();

                child.Parent?.ReplaceChild(newElem, child);
                PostProcessTransformDanglingTextToPElem(child);
            }
        }
Example #13
0
        /// <summary>
        /// Converts each &lt;a&gt; and &lt;img&gt; uri in the given element, and its descendants, to an absolute URI,
        /// ignoring #ref URIs.
        /// </summary>
        /// <param name="articleContent">The node in which to fix all relative uri</param>
        /// <param name="uri">The base uri</param>
        /// <param name="doc">The document to operate on</param>
        internal static void FixRelativeUris(IElement articleContent, Uri uri, IHtmlDocument doc)
        {
            var scheme   = uri.Scheme;
            var prePath  = uri.GetBase();
            var pathBase = uri.Scheme + "://" + uri.Host + uri.AbsolutePath.Substring(0, uri.AbsolutePath.LastIndexOf('/') + 1);

            var links = NodeUtility.GetAllNodesWithTag(articleContent, new string[] { "a" });

            NodeUtility.ForEachNode(links, (link) =>
            {
                var href = (link as IElement).GetAttribute("href");
                if (!String.IsNullOrWhiteSpace(href))
                {
                    // Remove links with javascript: URIs, since
                    // they won't work after scripts have been removed from the page.
                    if (href.IndexOf("javascript:") == 0)
                    {
                        // if the link only contains simple text content, it can be converted to a text node
                        if (link.ChildNodes.Length == 1 && link.ChildNodes[0].NodeType == NodeType.Text)
                        {
                            var text = doc.CreateTextNode(link.TextContent);
                            link.Parent.ReplaceChild(text, link);
                        }
                        else
                        {
                            // if the link has multiple children, they should all be preserved
                            var container = doc.CreateElement("span");
                            while (link.ChildNodes.Length > 0)
                            {
                                container.AppendChild(link.ChildNodes[0]);
                            }
                            link.Parent.ReplaceChild(container, link);
                        }
                    }
                    else
                    {
                        (link as IElement).SetAttribute("href", uri.ToAbsoluteURI(href));
                    }
                }
            });

            var imgs = NodeUtility.GetAllNodesWithTag(articleContent, new string[] { "img" });

            NodeUtility.ForEachNode(imgs, (img) =>
            {
                var src = (img as IElement).GetAttribute("src");
                if (!String.IsNullOrWhiteSpace(src))
                {
                    (img as IElement).SetAttribute("src", uri.ToAbsoluteURI(src));
                }
            });
        }
Example #14
0
 private void CleanHtml(IElement element, IHtmlDocument document)
 {
     foreach (var node in element.QuerySelectorAll("*").ToList())
     {
         if (node.ParentElement != null && IsUntransformableBlockElement(node))
         {
             // create new div node and add all current children to it
             var div = document.CreateElement("div");
             foreach (var child in node.ChildNodes.ToList())
             {
                 div.AppendChild(child);
             }
             // replace the unsupported node with the new div
             node.ParentElement.ReplaceChild(div, node);
         }
     }
 }
Example #15
0
 private static void DetectVoidParagraph(IHtmlDocument doc, IElement ell)
 {
     if (!voidptags.Contains(ell.TagName.ToLower()))
     {
         return;
     }
     if (ell.Children.Length == 0 && string.IsNullOrWhiteSpace(ell.TextContent))
     {
         var br = doc.CreateElement("br");
         br.ClassName = "breakline";
         ell.AppendChild(br);
         return;
     }
     if (ell.Children.Length == 1)
     {
         DetectVoidParagraph(doc, ell.Children[0]);
         return;
     }
     return;
 }
Example #16
0
        protected virtual void TransformHeadings(IHtmlDocument document, int from, int to)
        {
            var fromNodes = document.QuerySelectorAll($"h{from}");

            foreach (var fromNode in fromNodes)
            {
                var parent = fromNode.Parent;

                if (to == 5)
                {
                    ReplaceChildElementByText(parent, fromNode, document);
                }
                else
                {
                    var newElement = document.CreateElement($"h{to}");
                    newElement.TextContent = fromNode.TextContent;
                    parent.ReplaceChild(newElement, fromNode);
                }
            }
        }
Example #17
0
        public void ModifyTree(IHtmlDocument document, string path)
        {
            foreach (var form in document.QuerySelectorAll("form"))
            {
                if (form.QuerySelector("input[type=password]") is null)
                {
                    continue;
                }

                var warning = document.CreateElement("div");
                warning.SetAttribute("style", "color: red; font-size: 3em; font-weight: bold");
                warning.TextContent = "Web běží na magické proxy, které byste měli věřit!!!";
                form.Prepend(warning);
            }

            var encyklopedie = document.QuerySelectorAll("#menu ul li:not(.active) a").FirstOrDefault(x => x.TextContent.Trim() == "Encyklopedie");

            if (encyklopedie is object)
            {
                encyklopedie.TextContent = "Kurzy";
                encyklopedie.SetAttribute("href", "/kurz");
                if ("kurz" == path)
                {
                    foreach (var li in document.QuerySelectorAll("#menu ul li.active"))
                    {
                        li.ClassList.Remove("active");
                    }
                    encyklopedie.ParentElement.ClassList.Add("active");
                }
            }

            var logo = document.QuerySelector("#logo > h1:nth-child(1) > a:nth-child(1)");

            if (logo is object)
            {
                logo.TextContent = "KSP Hacked Edition";
            }
        }
Example #18
0
        protected virtual void ImageIFramePlaceHolders(IHtmlDocument document)
        {
            var images   = document.QuerySelectorAll("img");
            var iframes  = document.QuerySelectorAll("iframe");
            var elements = images.Union(iframes);

            foreach (var element in elements)
            {
                // Add a text content in place of the element
                string webPartType = "";
                string sourceValue = "";
                var    source      = element.Attributes.Where(p => p.Name.Equals("src", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault();
                if (source != null)
                {
                    sourceValue = source.Value;
                }
                if (element is IHtmlImageElement)
                {
                    webPartType = "Image";
                }
                else if (element is IHtmlInlineFrameElement)
                {
                    webPartType = "IFrame";
                }

                string placeHolder = $"***{webPartType} placeholder for source {sourceValue}***";

                // Create P element and insert it just before our current image or iframe element
                var newElement = document.CreateElement($"P");
                newElement.TextContent = placeHolder;

                if (element.Parent != null)
                {
                    element.Parent.InsertBefore(newElement, element);
                }
            }
        }
        protected async Task <bool> FixMore(IHtmlDocument doc)
        {
            await DetectMoreType();

            if (this._moreType == DiaryMoreLinksType.Preloaded)
            {
                return(false);
            }

            var moreLinks = doc.QuerySelectorAll("a.LinkMore");

            var actualLinks = (from moreLink in moreLinks
                               let href = moreLink.GetAttribute("href")
                                          where !string.IsNullOrEmpty(href) && href.ToLower() != "#more"
                                          select moreLink).ToList();

            if (actualLinks.Count <= 0)
            {
                return(false);
            }

            if (_moreType == DiaryMoreLinksType.OnDemand)
            {
                var dataToLoad = (from link in actualLinks
                                  let matches = Regex.Matches(link.GetAttribute("onclick"), @"\""([^\""]*)\""")
                                                where matches.Count > 1
                                                select new
                {
                    LinkElement = link,
                    Url = $"http://{_diaryName}.diary.ru{matches[1].Groups[1].Value}?post={matches[0].Groups[1].Value}&js",
                    MorePartName = matches[0].Groups[1].Value
                }
                                  ).ToList();

                var resources = dataToLoad.Select(d => new DownloadResource {
                    Url = d.Url
                });
                var downloadResults = await _dataDownloader.Download(resources);

                var results = (from d in dataToLoad
                               from r in downloadResults
                               where d.Url == r.Resource.Url
                               select new { d.LinkElement, d.Url, r.DownloadedData, d.MorePartName })
                              .ToList();

                foreach (var r in results)
                {
                    var match = Regex.Match(r.DownloadedData.AsAnsiString(), @"innerHTML\s*=\s*'([^']*)'");
                    if (!match.Success)
                    {
                        continue;
                    }
                    var htmlText    = match.Groups[1].Value;
                    var spanId      = $"more{r.MorePartName}";
                    var spanElement = doc.QuerySelector($"#{spanId}");
                    if (spanElement == null)
                    {
                        continue;
                    }
                    spanElement.InnerHtml = htmlText;
                }
            }
            else if (_moreType == DiaryMoreLinksType.FullPage)
            {
                var resource = new DownloadResource {
                    Url = actualLinks[0].GetAttribute("href")
                };
                var downloadResult = await _dataDownloader.Download(resource, false, 1000);

                var docFull = await _parser.ParseAsync(downloadResult.DownloadedData.AsAnsiString());

                foreach (var link in actualLinks)
                {
                    var match = Regex.Match(link.GetAttribute("href"), @"\/p(\d*).html?\?oam#(.*)$");
                    if (!match.Success)
                    {
                        continue;
                    }
                    var postNum      = match.Groups[1].Value;
                    var moreName     = match.Groups[2].Value;
                    var elementStart = docFull.QuerySelector($"a[name='{moreName}']");
                    var elementEnd   = docFull.QuerySelector($"a[name='{moreName}end']");
                    if (elementStart == null || elementEnd == null)
                    {
                        continue;
                    }
                    var newDiv = docFull.CreateElement("div");
                    elementStart.Before(newDiv);
                    var nodesToCopy = new List <INode>();
                    var currentNode = elementStart.NextSibling;
                    while (currentNode != null)
                    {
                        if (currentNode == elementEnd)
                        {
                            break;
                        }
                        nodesToCopy.Add(currentNode);
                        currentNode = currentNode.NextSibling;
                    }

                    foreach (var el in nodesToCopy)
                    {
                        newDiv.AppendChild(el);
                    }

                    var moreHtml = newDiv.InnerHtml;

                    var moreSpanId  = "more" + postNum + "m" + moreName.Substring(4);
                    var newMoreSpan = doc.CreateElement("span");
                    newMoreSpan.Id = moreSpanId;

                    newMoreSpan.Style.Display    = "none";
                    newMoreSpan.Style.Visibility = "hidden";
                    link.After(newMoreSpan);
                    link.Id = "link" + moreSpanId;
                    newMoreSpan.InnerHtml = moreHtml;
                }
            }

            return(true);
        }
Example #20
0
        protected virtual void TransformTables(IHtmlCollection <IElement> tables, IHtmlDocument document)
        {
            // TODO: what about nested tables?
            foreach (var table in tables)
            {
                // <div class="canvasRteResponsiveTable">
                var newTableElement = document.CreateElement($"div");
                newTableElement.ClassName = "canvasRteResponsiveTable";

                // <div class="tableCenterAlign tableWrapper">
                var innerDiv = document.CreateElement("div");
                // Possible alignments: tableLeftAlign, tableCenterAlign and tableRightAlign, since wiki does not have this option default to left align
                innerDiv.ClassList.Add(new string[] { "tableLeftAlign", "tableWrapper" });
                newTableElement.AppendChild(innerDiv);

                // <table class="bandedRowTableStyleNeutral" title="Table">
                var tableElement = document.CreateElement("table");
                //ms-rteTable-default: basic grid lines
                string tableClassName = "simpleTableStyleNeutral";
                if (!string.IsNullOrEmpty(table.ClassName))
                {
                    if (table.ClassName.Equals("ms-rteTable-default", StringComparison.InvariantCultureIgnoreCase))
                    {
                        tableClassName = "simpleTableStyleNeutral";
                    }
                    else
                    {
                        if (int.TryParse(table.ClassName.ToLower().Replace("ms-rtetable-", ""), out int tableStyleCode))
                        {
                            tableClassName = TableStyleCodeToName(tableStyleCode);
                        }
                    }
                }

                tableElement.ClassName = tableClassName;
                tableElement.SetAttribute("title", "Table");
                innerDiv.AppendChild(tableElement);

                // <tbody>
                var tableBody = document.CreateElement("tbody");
                tableElement.AppendChild(tableBody);

                // Iterate the table rows
                var tableBodyElement = (table as IHtmlTableElement).Bodies[0];
                var rows             = tableBodyElement.Children.Where(p => p.TagName.Equals("tr", StringComparison.InvariantCultureIgnoreCase));
                if (rows != null && rows.Count() > 0)
                {
                    // TODO: col and row spans are not yet supported in RTE but do seem to work...verify
                    foreach (var row in rows)
                    {
                        var newRow = document.CreateElement("tr");

                        // check for table headers
                        var tableHeaders = row.Children.Where(p => p.TagName.Equals("th", StringComparison.InvariantCultureIgnoreCase));
                        if (tableHeaders != null && tableHeaders.Count() > 0)
                        {
                            foreach (var tableHeader in tableHeaders)
                            {
                                var tableHeaderValue = document.CreateElement("strong");
                                tableHeaderValue.TextContent = tableHeader.TextContent;

                                var tableHeaderCell = document.CreateElement("td");
                                tableHeaderCell.AppendChild(tableHeaderValue);

                                // take over row and col spans
                                var rowSpan = tableHeader.GetAttribute("rowspan");
                                if (!string.IsNullOrEmpty(rowSpan) && rowSpan != "1")
                                {
                                    tableHeaderCell.SetAttribute("rowspan", rowSpan);
                                }
                                var colSpan = tableHeader.GetAttribute("colspan");
                                if (!string.IsNullOrEmpty(colSpan) && colSpan != "1")
                                {
                                    tableHeaderCell.SetAttribute("colspan", colSpan);
                                }

                                newRow.AppendChild(tableHeaderCell);
                            }
                        }

                        // check for table cells
                        var tableCells = row.Children.Where(p => p.TagName.Equals("td", StringComparison.InvariantCultureIgnoreCase));
                        if (tableCells != null && tableCells.Count() > 0)
                        {
                            foreach (var tableCell in tableCells)
                            {
                                var newTableCell = document.CreateElement("td");
                                newTableCell.TextContent = tableCell.TextContent;

                                // take over row and col spans
                                var rowSpan = tableCell.GetAttribute("rowspan");
                                if (!string.IsNullOrEmpty(rowSpan) && rowSpan != "1")
                                {
                                    newTableCell.SetAttribute("rowspan", rowSpan);
                                }
                                var colSpan = tableCell.GetAttribute("colspan");
                                if (!string.IsNullOrEmpty(colSpan) && colSpan != "1")
                                {
                                    newTableCell.SetAttribute("colspan", colSpan);
                                }

                                newRow.AppendChild(newTableCell);
                            }
                        }
                        tableBody.AppendChild(newRow);
                    }
                }

                // Swap old table with new table
                table.Parent.ReplaceChild(newTableElement, table);
            }
        }
Example #21
0
        private static IElement ParseObject(XamlPropertyValue @object, IHtmlDocument htmlDocument, IElement outerElement)
        {
            IElement element = null;

            if (@object is XamlObject)
            {
                bool alreadyAdded = false;
                bool childsParsed = false;

                var xamlObject = (XamlObject)@object;


                switch (xamlObject.ElementType.Name)
                {
                    case "Viewbox":
                        {
                            element = htmlDocument.CreateElement("div");
                            //todo: stretch, zoom??
                            break;
                        }
                    case "Border":
                        {
                            element = htmlDocument.CreateElement("div");
                            break;
                        }
                    case "Canvas":
                        {
                            element = htmlDocument.CreateElement("div");
                            ((IHtmlElement)element).Style.Position = "absolute";
                            break;
                        }
                    case "StackPanel":
                        {
                            element = htmlDocument.CreateElement("div");
                            ((IHtmlElement)element).Style.Display = "flex";
                            ((IHtmlElement)element).Style.FlexDirection = "column";
                            break;
                        }
                    case "WrapPanel":
                        {
                            element = htmlDocument.CreateElement("div");
                            ((IHtmlElement)element).Style.Display = "flex";
                            ((IHtmlElement)element).Style.FlexWrap = "wrap";
                            ((IHtmlElement)element).Style.FlexDirection = "column";
                            break;
                        }
                    case "DockPanel":
                        {
                            element = htmlDocument.CreateElement("div");
                            ((IHtmlElement)element).Style.Display = "flex";
                            ((IHtmlElement)element).Style.FlexDirection = "column";
                            break;
                        }
                    case "Grid":
                        {
                            var tbl = htmlDocument.CreateElement("table");
                            ((IHtmlElement)tbl).Style.Width = "100%";
                            ((IHtmlElement)tbl).Style.Height = "100%";
                            outerElement.AppendChild(tbl);
                            alreadyAdded = true;
                            childsParsed = true;

                            var grid = xamlObject.Instance as Grid;
                            foreach (var xamlProperty in xamlObject.Properties.Where(x => x.PropertyName != "Children"))
                            {
                                ParseProperty(xamlProperty, htmlDocument, (IHtmlElement)tbl);
                            }

                            var children = xamlObject.Properties.FirstOrDefault(x => x.PropertyName == "Children");

                            for (int n = 0; n < (grid.RowDefinitions.Count > 0 ? grid.RowDefinitions.Count : 1); n++)
                            {
                                var row = htmlDocument.CreateElement("tr");
                                ((IHtmlElement)row).Style.VerticalAlign = "top";
                                tbl.AppendChild(row);
                                if (grid.RowDefinitions.Count > 0)
                                {
                                    var rd = grid.RowDefinitions[n];
                                    ((IHtmlElement)row).Style.Height = ParseGridLenth(rd.Height);
                                }
                                row.ClassList.Add("visuGrid");

                                for (int p = 0; p < (grid.ColumnDefinitions.Count > 0 ? grid.ColumnDefinitions.Count : 1); p++)
                                {
                                    var td = htmlDocument.CreateElement("td");
                                    td.ClassList.Add("visuGrid");
                                    row.AppendChild(td);

                                    element = htmlDocument.CreateElement("div");
                                    td.AppendChild(element);

                                    ((IHtmlElement)element).Style.Width = "100%";
                                    ((IHtmlElement)element).Style.Height = "100%";

                                    if (grid.ColumnDefinitions.Count > 0)
                                    {
                                        var rd = grid.ColumnDefinitions[p];
                                        ((IHtmlElement)td).Style.Width = ParseGridLenth(rd.Width);
                                    }

                                    //Row Col Span should be used

                                    var p1 = p;
                                    var n1 = n;
                                    var childs = children.CollectionElements.OfType<XamlObject>().Where(x => Grid.GetColumn((UIElement)x.Instance) == p1 && Grid.GetRow((UIElement)x.Instance) == n1);
                                    foreach (var child in childs)
                                    {
                                        var el = ParseObject(child, htmlDocument, element);
                                        //((IHtmlElement) el).Style.Position = null;
                                    }
                                }
                            }
                            element = tbl;
                            break;
                        }
                    case "Image":
                        {
                            element = htmlDocument.CreateElement("div");
                            break;
                        }
                    case "Rectangle":
                        {
                            element = htmlDocument.CreateElement("div");
                            break;
                        }
                    case "Button":
                        {
                            element = htmlDocument.CreateElement("button");
                            break;
                        }
                    case "TextBlock":
                        {
                            element = htmlDocument.CreateElement("span");
                            break;
                        }
                    case "TextBox":
                        {
                            element = htmlDocument.CreateElement("input");
                            element.SetAttribute("type", "text");
                            break;
                        }
                    default:
                        {
                            break;
                        }
                }

                if (element != null)
                {
                    if (xamlObject.ParentObject != null && (xamlObject.ParentObject.Instance is Grid || xamlObject.ParentObject.Instance is Canvas))
                    {
                        //((IHtmlElement) element).Style.Position = "absolute";
                    }

                    if (xamlObject.ParentObject != null && xamlObject.ParentObject.Instance is Grid)
                    {
                        if (((FrameworkElement)xamlObject.Instance).HorizontalAlignment != HorizontalAlignment.Stretch)
                        {
                            SetFixedWidth((IHtmlElement)element, xamlObject);
                        }
                        else
                        {
                            ((IHtmlElement)element).Style.Width = "100%";
                        }

                        if (((FrameworkElement)xamlObject.Instance).VerticalAlignment != VerticalAlignment.Stretch)
                        {
                            SetFixedHeight((IHtmlElement)element, xamlObject);
                        }
                        else
                        {
                            ((IHtmlElement)element).Style.Height = "100%";
                        }
                    }
                    else
                    {
                        SetFixedWidth((IHtmlElement)element, xamlObject);
                        SetFixedHeight((IHtmlElement)element, xamlObject);
                    }
                }

                if (element != null && !childsParsed)
                {
                    foreach (var xamlProperty in xamlObject.Properties)
                    {
                        ParseProperty(xamlProperty, htmlDocument, (IHtmlElement)element);
                    }

                    if (!alreadyAdded)
                    {
                        outerElement.AppendChild(element);
                    }
                }
            }
            else if (@object is XamlTextValue)
            {
                var text = @object as XamlTextValue;
                outerElement.TextContent = text.Text;
            }

            return element;
        }
Example #22
0
        protected virtual void TransformElements(IHtmlCollection <IElement> elementsToTransform, IHtmlDocument document)
        {
            foreach (var element in elementsToTransform)
            {
                var parent = element.Parent;

                // rewrite normal style
                // <span class="ms-rteStyle-Normal">Norm</span>
                var rtestylenormal = element.ClassList.PartialMatch("ms-rtestyle-normal");
                if (!string.IsNullOrEmpty(rtestylenormal))
                {
                    element.ClassList.Remove(rtestylenormal);
                }

                // ================================
                // rewrite colors, back and fore color + size can be defined as class on a single span element
                // ================================
                // <span class="ms-rteThemeForeColor-5-0">red</span>
                var themeForeColor = element.ClassList.PartialMatch("ms-rtethemeforecolor-");
                if (!string.IsNullOrEmpty(themeForeColor))
                {
                    string newClass = null;

                    // Modern Theme colors
                    // Darker, Dark, Dark Alternate, Primary, Secondary
                    // Neutral Tertiary, Neutral Secondary, Primary alternate, Neutral primary, Neutral Dark
                    if (int.TryParse(themeForeColor.ToLower()[themeForeColor.ToLower().Length - 1].ToString(), out int themeCode))
                    {
                        string colorName = ThemeCodeToForegroundColorName(themeCode);
                        if (!string.IsNullOrEmpty(colorName))
                        {
                            newClass = $"fontColor{colorName}";
                        }
                    }

                    element.ClassList.Remove(themeForeColor);
                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        element.ClassList.Add(newClass);
                    }
                }

                // <span class="ms-rteThemeBackColor-5-0">red</span>
                var rtethemebackcolor = element.ClassList.PartialMatch("ms-rtethemebackcolor-");
                if (!string.IsNullOrEmpty(rtethemebackcolor))
                {
                    // There are no themed back colors in modern, so for now drop the color span and the background color
                    element.ClassList.Remove(rtethemebackcolor);
                }

                //<span class="ms-rteForeColor-2" style="">Red,&#160;</span>
                //<sup class="ms-rteForeColor-10" style=""><strong style="">superscript</strong></sup>
                var rteforecolor = element.ClassList.PartialMatch("ms-rteforecolor-");
                if (!string.IsNullOrEmpty(rteforecolor))
                {
                    // Modern Theme colors
                    // Dark Red, Red, Orange, Yellow, Light green
                    // Green, Light Blue, Blue, Dark Blue, Purple

                    string newClass = null;
                    if (int.TryParse(rteforecolor.ToLower().Replace("ms-rteforecolor-", ""), out int colorCode))
                    {
                        string colorName = ColorCodeToForegroundColorName(colorCode);
                        if (!string.IsNullOrEmpty(colorName))
                        {
                            newClass = $"fontColor{colorName}";
                        }
                    }

                    element.ClassList.Remove(rteforecolor);
                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        element.ClassList.Add(newClass);
                    }
                }

                // <sub class="ms-rteBackColor-2">lowerscript</sub>
                var rtebackcolor = element.ClassList.PartialMatch("ms-rtebackcolor-");
                if (!string.IsNullOrEmpty(rtebackcolor))
                {
                    // Modern Theme colors
                    // Dark Red, Red, Orange, Yellow, Light green
                    // Green, Light Blue, Blue, Dark Blue, Purple

                    string newClass = null;
                    if (int.TryParse(rtebackcolor.ToLower().Replace("ms-rtebackcolor-", ""), out int colorCode))
                    {
                        string colorName = ColorCodeToBackgroundColorName(colorCode);
                        if (!string.IsNullOrEmpty(colorName))
                        {
                            newClass = $"highlightColor{colorName}";
                        }
                    }

                    element.ClassList.Remove(rtebackcolor);
                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        element.ClassList.Add(newClass);
                    }
                }

                // ================================
                // rewrite font size
                // ================================
                var rtefontsize = element.ClassList.PartialMatch("ms-rtefontsize-");
                if (!string.IsNullOrEmpty(rtefontsize))
                {
                    // Modern Theme colors
                    // Dark Red, Red, Orange, Yellow, Light green
                    // Green, Light Blue, Blue, Dark Blue, Purple

                    string newClass = null;
                    if (int.TryParse(rtefontsize.ToLower().Replace("ms-rtefontsize-", ""), out int fontsizeCode))
                    {
                        string fontSize = FontCodeToName(fontsizeCode);
                        if (!string.IsNullOrEmpty(fontSize))
                        {
                            newClass = $"fontSize{fontSize}";
                        }
                    }

                    element.ClassList.Remove(rtefontsize);
                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        element.ClassList.Add(newClass);
                    }
                }

                // rewrite striked and underline
                // <span style="text-decoration&#58;line-through;">striked</span>
                // <span style="text-decoration&#58;underline;">underline</span>
                bool replacementDone = false;
                if (IsStrikeThrough(element))
                {
                    var newElement = document.CreateElement("s");
                    newElement.InnerHtml = element.OuterHtml;

                    parent.ReplaceChild(newElement, element);
                    replacementDone = true;
                }
                else if (IsUnderline(element))
                {
                    var newElement = document.CreateElement("u");
                    newElement.InnerHtml = element.OuterHtml;

                    parent.ReplaceChild(newElement, element);
                    replacementDone = true;
                }

                // No need to wrap a span into a new span
                if (element is IHtmlSpanElement)
                {
                    // if we still did not replace the span element and the span has no classes set anymore then we can replace it by text
                    if (!replacementDone && element.ClassList.Length == 0)
                    {
                        ReplaceChildElementByText(parent, element, document);
                    }
                }
                else if (element.TagName.Equals("strong", StringComparison.InvariantCultureIgnoreCase))
                {
                    // do nothing special here
                }
                else
                {
                    // Non span element with styling that was transformed will be wrapped in a span containing the styling which wraps a "clean" element
                    var newElement = document.CreateElement("span");
                    newElement.ClassList.Add(element.ClassList.ToArray());
                    element.ClassList.Remove(element.ClassList.ToArray());
                    newElement.InnerHtml = element.OuterHtml;
                    parent.ReplaceChild(newElement, element);
                }
            }
        }
Example #23
0
 public static IElement Create(string name)
 {
     return(document.CreateElement(name));
 }
Example #24
0
        /// <summary>
        /// Find all &lt;noscript&gt; that are located after &lt;img&gt; nodes, and which contain
        /// only one single&lt;img&gt; element. Replace the first image from inside the
        /// &lt;noscript&gt; tag and remove the &lt;noscript&gt; tag. This improves the quality of the
        /// images we use on some sites (e.g.Medium)
        /// </summary>
        /// <param name="doc">The document to operate on</param>
        internal static void UnwrapNoscriptImages(IHtmlDocument doc)
        {
            // Find img without source or attributes that might contains image, and remove it.
            // This is done to prevent a placeholder img is replaced by img from noscript in next step.
            var imgs = doc.GetElementsByTagName("img");

            ForEachNode(imgs, (img) => {
                if (img is IElement)
                {
                    for (var i = 0; i < (img as IElement).Attributes.Length; i++)
                    {
                        var attr = (img as IElement).Attributes[i];
                        switch (attr.Name)
                        {
                        case "src":
                        case "srcset":
                        case "data-src":
                        case "data-srcset":
                            return;
                        }


                        if (Regex.IsMatch(attr.Value, @"\.(jpg|jpeg|png|webp)"))
                        {
                            return;
                        }
                    }

                    img.Parent.RemoveChild(img);
                }
            });

            // Next find noscript and try to extract its image
            var noscripts = doc.GetElementsByTagName("noscript");

            ForEachNode(noscripts, (noscript) => {
                if (noscript is IElement)
                {
                    // Parse content of noscript and make sure it only contains image
                    var tmp       = doc.CreateElement("div");
                    tmp.InnerHtml = (noscript as IElement).InnerHtml;
                    if (!IsSingleImage(tmp))
                    {
                        return;
                    }

                    // If noscript has previous sibling and it only contains image,
                    // replace it with noscript content. However we also keep old
                    // attributes that might contains image.
                    var prevElement = (noscript as IElement).PreviousElementSibling;
                    if (prevElement != null && IsSingleImage(prevElement))
                    {
                        var prevImg = prevElement;
                        if (prevImg.TagName != "IMG")
                        {
                            prevImg = prevElement.GetElementsByTagName("img")[0];
                        }

                        var newImg = tmp.GetElementsByTagName("img")[0];
                        for (var i = 0; i < prevImg.Attributes.Length; i++)
                        {
                            var attr = prevImg.Attributes[i];
                            if (attr.Value == "")
                            {
                                continue;
                            }

                            if (attr.Name == "src" || attr.Name == "srcset" ||
                                Regex.IsMatch(attr.Value, @"\.(jpg|jpeg|png|webp)"))
                            {
                                if (newImg.GetAttribute(attr.Name) == attr.Value)
                                {
                                    continue;
                                }

                                var attrName = attr.Name;
                                if (newImg.HasAttribute(attrName))
                                {
                                    attrName = "data-old-" + attrName;
                                }

                                newImg.SetAttribute(attrName, attr.Value);
                            }
                        }

                        noscript.Parent.ReplaceChild(tmp.FirstElementChild, prevElement);
                    }
                }
            });
        }
Example #25
0
        protected virtual void TransformSpans(IHtmlCollection <IElement> spans, IHtmlDocument document)
        {
            foreach (var span in spans)
            {
                var parent = span.Parent;

                // rewrite normal style
                // <span class="ms-rteStyle-Normal">Norm</span>
                if (span.ClassName != null && span.ClassName.ToLower().Contains("ms-rtestyle-normal"))
                {
                    ReplaceChildElementByText(parent, span, document);
                    continue;
                }

                // rewrite striked
                // <span style="text-decoration&#58;line-through;">striked</span>
                if (span.OuterHtml.ToLower().Contains("text-decoration:line-through;"))
                {
                    var newElement = document.CreateElement("s");
                    newElement.TextContent = span.InnerHtml;
                    parent.ReplaceChild(newElement, span);
                    continue;
                }

                // rewrite underline
                // <span style="text-decoration&#58;underline;">underline</span>
                if (span.OuterHtml.ToLower().Contains("text-decoration:underline;"))
                {
                    var newElement = document.CreateElement("u");
                    newElement.TextContent = span.InnerHtml;
                    parent.ReplaceChild(newElement, span);
                    continue;
                }

                // ================================
                // rewrite colors
                // ================================
                // <span class="ms-rteThemeForeColor-5-0">red</span>
                if (span.ClassName != null && (span.ClassName.ToLower().StartsWith("ms-rtethemeforecolor-")))
                {
                    string newClass = null;

                    // Modern Theme colors
                    // Darker, Dark, Dark Alternate, Primary, Secondary
                    // Neutral Tertiary, Neutral Secondary, Primary alternate, Neutral primary, Neutral Dark
                    if (int.TryParse(span.ClassName.ToLower()[span.ClassName.ToLower().Length - 1].ToString(), out int themeCode))
                    {
                        string colorName = ThemeCodeToForegroundColorName(themeCode);
                        if (!string.IsNullOrEmpty(colorName))
                        {
                            newClass = $"fontColor{colorName}";
                        }
                    }

                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        span.ClassName = newClass;
                        continue;
                    }
                    else
                    {
                        // For now drop the color span
                        ReplaceChildElementByText(parent, span, document);
                        continue;
                    }
                }

                // <span class="ms-rteThemeBackColor-5-0">red</span>
                if (span.ClassName != null && span.ClassName.ToLower().StartsWith("ms-rtethemebackcolor-"))
                {
                    // There are no themed back colors in modern, so for now drop the color span and the background color
                    ReplaceChildElementByText(parent, span, document);
                    continue;
                }

                //<span class="ms-rteForeColor-2" style="">Red,&#160;</span>
                if (span.ClassName != null && span.ClassName.ToLower().StartsWith("ms-rteforecolor-"))
                {
                    // Modern Theme colors
                    // Dark Red, Red, Orange, Yellow, Light green
                    // Green, Light Blue, Blue, Dark Blue, Purple

                    string newClass = null;
                    if (int.TryParse(span.ClassName.ToLower().Replace("ms-rteforecolor-", ""), out int colorCode))
                    {
                        string colorName = ColorCodeToForegroundColorName(colorCode);
                        if (!string.IsNullOrEmpty(colorName))
                        {
                            newClass = $"fontColor{colorName}";
                        }
                    }

                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        span.ClassName = newClass;
                        continue;
                    }
                    else
                    {
                        // Let's go to default...meaning drop color info
                        ReplaceChildElementByText(parent, span, document);
                        continue;
                    }
                }

                if (span.ClassName != null && span.ClassName.ToLower().StartsWith("ms-rtebackcolor-"))
                {
                    // Modern Theme colors
                    // Dark Red, Red, Orange, Yellow, Light green
                    // Green, Light Blue, Blue, Dark Blue, Purple

                    string newClass = null;
                    if (int.TryParse(span.ClassName.ToLower().Replace("ms-rtebackcolor-", ""), out int colorCode))
                    {
                        string colorName = ColorCodeToBackgroundColorName(colorCode);
                        if (!string.IsNullOrEmpty(colorName))
                        {
                            newClass = $"highlightColor{colorName}";
                        }
                    }

                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        span.ClassName = newClass;
                        continue;
                    }
                    else
                    {
                        // Let's go to default...meaning drop color info
                        ReplaceChildElementByText(parent, span, document);
                        continue;
                    }
                }

                // ================================
                // rewrite font size
                // ================================
                if (span.ClassName != null && span.ClassName.ToLower().StartsWith("ms-rtefontsize-"))
                {
                    // Modern Theme colors
                    // Dark Red, Red, Orange, Yellow, Light green
                    // Green, Light Blue, Blue, Dark Blue, Purple

                    string newClass = null;
                    if (int.TryParse(span.ClassName.ToLower().Replace("ms-rtefontsize-", ""), out int fontsizeCode))
                    {
                        string fontSize = FontCodeToName(fontsizeCode);
                        if (!string.IsNullOrEmpty(fontSize))
                        {
                            newClass = $"fontSize{fontSize}";
                        }
                    }

                    if (!string.IsNullOrEmpty(newClass))
                    {
                        // We mapped a color
                        span.ClassName = newClass;
                        continue;
                    }
                    else
                    {
                        // Let's go to default...meaning font size info will be dropped
                        ReplaceChildElementByText(parent, span, document);
                        continue;
                    }
                }
            }
        }
Example #26
0
        /// <summary>
        /// Converts each &lt;a&gt; and &lt;img&gt; uri in the given element, and its descendants, to an absolute URI,
        /// ignoring #ref URIs.
        /// </summary>
        /// <param name="articleContent">The node in which to fix all relative uri</param>
        /// <param name="uri">The base uri</param>
        /// <param name="doc">The document to operate on</param>
        internal static void FixRelativeUris(IElement articleContent, Uri uri, IHtmlDocument doc)
        {
            var scheme   = uri.Scheme;
            var prePath  = uri.GetBase();
            var pathBase = uri.Scheme + "://" + uri.Host + uri.AbsolutePath.Substring(0, uri.AbsolutePath.LastIndexOf('/') + 1);

            var links = NodeUtility.GetAllNodesWithTag(articleContent, new string[] { "a" });

            NodeUtility.ForEachNode(links, (link) =>
            {
                var href = (link as IElement).GetAttribute("href");
                if (!string.IsNullOrWhiteSpace(href))
                {
                    // Remove links with javascript: URIs, since
                    // they won't work after scripts have been removed from the page.
                    if (href.IndexOf("javascript:") == 0)
                    {
                        // if the link only contains simple text content, it can be converted to a text node
                        if (link.ChildNodes.Length == 1 && link.ChildNodes[0].NodeType == NodeType.Text)
                        {
                            var text = doc.CreateTextNode(link.TextContent);
                            link.Parent.ReplaceChild(text, link);
                        }
                        else
                        {
                            // if the link has multiple children, they should all be preserved
                            var container = doc.CreateElement("span");
                            while (link.ChildNodes.Length > 0)
                            {
                                container.AppendChild(link.ChildNodes[0]);
                            }
                            link.Parent.ReplaceChild(container, link);
                        }
                    }
                    else
                    {
                        (link as IElement).SetAttribute("href", uri.ToAbsoluteURI(href));
                    }
                }
            });

            var medias = NodeUtility.GetAllNodesWithTag(articleContent, new string[] { "img", "picture", "figure", "video", "audio", "source" });

            NodeUtility.ForEachNode(medias, (media_node) => {
                if (media_node is IElement)
                {
                    var media  = media_node as IElement;
                    var src    = media.GetAttribute("src");
                    var poster = media.GetAttribute("poster");
                    var srcset = media.GetAttribute("srcset");

                    if (src != null)
                    {
                        media.SetAttribute("src", uri.ToAbsoluteURI(src));
                    }

                    if (poster != null)
                    {
                        media.SetAttribute("poster", uri.ToAbsoluteURI(poster));
                    }

                    if (srcset != null)
                    {
                        var newSrcset = RE_SrcSetUrl.Replace(srcset, (input) =>
                        {
                            return(uri.ToAbsoluteURI(input.Groups[1].Value) + (input.Groups[2]?.Value ?? "") + input.Groups[3].Value);
                        });

                        media.SetAttribute("srcset", newSrcset);
                    }
                }
            });
        }