Beispiel #1
0
        public string CleanHtml(IDataMap map, string itemPath, string html)
        {
            if (String.IsNullOrEmpty(html))
            {
                return(html);
            }

            var document = new HtmlDocument();

            document.LoadHtml(html);

            HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()");

            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return(html);
            }

            var nodes = new Queue <HtmlNode>(tryGetNodes);

            int i = 0;

            while (nodes.Count > 0)
            {
                var node       = nodes.Dequeue();
                var nodeName   = node.Name.ToLower();
                var parentNode = node.ParentNode;
                var childNodes = node.SelectNodes("./*|./text()");

                if (childNodes != null)
                {
                    foreach (var child in childNodes)
                    {
                        nodes.Enqueue(child);
                    }
                }

                if (UnwantedTags.Any(tag => tag == nodeName))
                {
                    // if this node is one to remove
                    if (childNodes != null)
                    {
                        // make sure children are added back
                        foreach (var child in childNodes)
                        {
                            parentNode.InsertBefore(child, node);
                        }
                    }

                    parentNode.RemoveChild(node);
                }
                else if (node.HasAttributes)
                {
                    // if it's not being removed
                    foreach (string s in UnwantedAttributes) // remove unwanted attributes
                    {
                        node.Attributes.Remove(s);
                    }

                    //replace images
                    if (nodeName.Equals("img"))
                    {
                        // see if it exists
                        string    imgSrc = node.Attributes["src"].Value;
                        MediaItem newImg = HandleImage(map, MediaParentItem, itemPath, imgSrc);
                        if (newImg != null)
                        {
                            string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID().ToString());
                            // replace the node with sitecore tag
                            node.SetAttributeValue("src", newSrc);
                        }
                    }
                }

                i++;
            }

            return(document.DocumentNode.InnerHtml);
        }
        private void HandleNextNode(Queue <HtmlNode> nodes, IDataMap map, string itemPath)
        {
            var node       = nodes.Dequeue();
            var nodeName   = node.Name.ToLower();
            var parentNode = node.ParentNode;
            var childNodes = node.SelectNodes("./*|./text()");

            if (childNodes != null)
            {
                foreach (var child in childNodes)
                {
                    nodes.Enqueue(child);
                }
            }

            if (UnwantedTags.Any(tag => tag == nodeName))
            {
                // if this node is one to remove
                if (childNodes != null)
                {
                    // make sure children are added back
                    foreach (var child in childNodes)
                    {
                        parentNode.InsertBefore(child, node);
                    }
                }

                parentNode.RemoveChild(node);
            }
            else if (node.HasAttributes)
            {
                // if it's not being removed
                foreach (string s in UnwantedAttributes)                 // remove unwanted attributes
                {
                    node.Attributes.Remove(s);
                }

                //replace images
                if (nodeName.Equals("img") || nodeName.Equals("script"))
                {
                    // see if it exists
                    string imgSrc = node.Attributes.Contains("src") ? node.Attributes["src"].Value : string.Empty;
                    if (!string.IsNullOrEmpty(imgSrc))
                    {
                        MediaItem newImg = HandleMedia(map, itemPath, imgSrc);
                        if (newImg != null)
                        {
                            string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID());
                            // replace the node with sitecore tag
                            node.SetAttributeValue("src", newSrc);
                        }
                    }
                }
                else if (nodeName.Equals("a") || nodeName.Equals("link"))
                {
                    if (nodeName.Equals("a") && node.Attributes.Contains("target"))
                    {
                        string target = node.Attributes.Contains("target") ? node.Attributes["target"].Value : string.Empty;
                        if (target.Equals("_blank", StringComparison.InvariantCultureIgnoreCase))
                        {
                            node.SetAttributeValue("rel", "noopener noreferrer");
                        }
                    }

                    // see if it exists
                    string linkHref = node.Attributes.Contains("href") ? node.Attributes["href"].Value : string.Empty;
                    if (!string.IsNullOrEmpty(linkHref))
                    {
                        MediaItem newImg = HandleMedia(map, itemPath, linkHref);
                        if (newImg != null)
                        {
                            string newHref = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID());
                            // replace the node with sitecore tag
                            node.SetAttributeValue("href", newHref);
                        }
                    }
                }
            }
            else
            {
                //Keep node as is
            }
        }
        private void HandleNextNode(Queue <HtmlNode> nodes, IDataMap map, string itemPath, Item importRow)
        {
            var node       = nodes.Dequeue();
            var nodeName   = node.Name.ToLower();
            var parentNode = node.ParentNode;
            var childNodes = node.SelectNodes("./*|./text()");

            if (childNodes != null)
            {
                foreach (var child in childNodes)
                {
                    nodes.Enqueue(child);
                }
            }

            if (UnwantedTags.Any(tag => tag == nodeName))
            {
                // if this node is one to remove
                if (childNodes != null)
                {
                    // make sure children are added back
                    foreach (var child in childNodes)
                    {
                        parentNode.InsertBefore(child, node);
                    }
                }

                parentNode.RemoveChild(node);
            }
            else if (node.HasAttributes)
            {
                // if it's not being removed
                foreach (string s in UnwantedAttributes)                 // remove unwanted attributes
                {
                    node.Attributes.Remove(s);
                }

                //replace images
                if (nodeName.Equals("img"))
                {
                    // see if it exists
                    string      imgSrc = node.Attributes["src"].Value;
                    DynamicLink dynamicLink;
                    if (!DynamicLink.TryParse(imgSrc, out dynamicLink))
                    {
                        return;
                    }
                    MediaItem mediaItem       = importRow.Database.GetItem(dynamicLink.ItemId, dynamicLink.Language ?? map.ImportToLanguage);
                    var       mediaParentItem = BuildMediaPath(map.ToDB, mediaItem.InnerItem.Paths.ParentPath);
                    MediaItem newImg          = HandleMediaItem(map, mediaParentItem, itemPath, mediaItem);
                    if (newImg != null)
                    {
                        string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID().ToString());
                        // replace the node with sitecore tag
                        node.SetAttributeValue("src", newSrc);
                    }
                }
            }
            else if (ReplaceTags.ContainsKey(nodeName))
            {
                // Replace tag
                node.Name = ReplaceTags[nodeName];
            }
            else
            {
                //Keep node as is
            }
        }