public string CleanHtml(IDataMap map, string itemPath, string html) { if (String.IsNullOrEmpty(html)) { return(html); } var document = new HtmlDocument(); document.LoadHtml(html); HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return(html); } var nodes = new Queue <HtmlNode>(tryGetNodes); int i = 0; while (nodes.Count > 0) { var node = nodes.Dequeue(); var nodeName = node.Name.ToLower(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (UnwantedTags.Any(tag => tag == nodeName)) { // if this node is one to remove if (childNodes != null) { // make sure children are added back foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } else if (node.HasAttributes) { // if it's not being removed foreach (string s in UnwantedAttributes) // remove unwanted attributes { node.Attributes.Remove(s); } //replace images if (nodeName.Equals("img")) { // see if it exists string imgSrc = node.Attributes["src"].Value; MediaItem newImg = HandleImage(map, MediaParentItem, itemPath, imgSrc); if (newImg != null) { string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID().ToString()); // replace the node with sitecore tag node.SetAttributeValue("src", newSrc); } } } i++; } return(document.DocumentNode.InnerHtml); }
private void HandleNextNode(Queue <HtmlNode> nodes, IDataMap map, string itemPath) { var node = nodes.Dequeue(); var nodeName = node.Name.ToLower(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (UnwantedTags.Any(tag => tag == nodeName)) { // if this node is one to remove if (childNodes != null) { // make sure children are added back foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } else if (node.HasAttributes) { // if it's not being removed foreach (string s in UnwantedAttributes) // remove unwanted attributes { node.Attributes.Remove(s); } //replace images if (nodeName.Equals("img") || nodeName.Equals("script")) { // see if it exists string imgSrc = node.Attributes.Contains("src") ? node.Attributes["src"].Value : string.Empty; if (!string.IsNullOrEmpty(imgSrc)) { MediaItem newImg = HandleMedia(map, itemPath, imgSrc); if (newImg != null) { string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID()); // replace the node with sitecore tag node.SetAttributeValue("src", newSrc); } } } else if (nodeName.Equals("a") || nodeName.Equals("link")) { if (nodeName.Equals("a") && node.Attributes.Contains("target")) { string target = node.Attributes.Contains("target") ? node.Attributes["target"].Value : string.Empty; if (target.Equals("_blank", StringComparison.InvariantCultureIgnoreCase)) { node.SetAttributeValue("rel", "noopener noreferrer"); } } // see if it exists string linkHref = node.Attributes.Contains("href") ? node.Attributes["href"].Value : string.Empty; if (!string.IsNullOrEmpty(linkHref)) { MediaItem newImg = HandleMedia(map, itemPath, linkHref); if (newImg != null) { string newHref = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID()); // replace the node with sitecore tag node.SetAttributeValue("href", newHref); } } } } else { //Keep node as is } }
private void HandleNextNode(Queue <HtmlNode> nodes, IDataMap map, string itemPath, Item importRow) { var node = nodes.Dequeue(); var nodeName = node.Name.ToLower(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (UnwantedTags.Any(tag => tag == nodeName)) { // if this node is one to remove if (childNodes != null) { // make sure children are added back foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } else if (node.HasAttributes) { // if it's not being removed foreach (string s in UnwantedAttributes) // remove unwanted attributes { node.Attributes.Remove(s); } //replace images if (nodeName.Equals("img")) { // see if it exists string imgSrc = node.Attributes["src"].Value; DynamicLink dynamicLink; if (!DynamicLink.TryParse(imgSrc, out dynamicLink)) { return; } MediaItem mediaItem = importRow.Database.GetItem(dynamicLink.ItemId, dynamicLink.Language ?? map.ImportToLanguage); var mediaParentItem = BuildMediaPath(map.ToDB, mediaItem.InnerItem.Paths.ParentPath); MediaItem newImg = HandleMediaItem(map, mediaParentItem, itemPath, mediaItem); if (newImg != null) { string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID().ToString()); // replace the node with sitecore tag node.SetAttributeValue("src", newSrc); } } } else if (ReplaceTags.ContainsKey(nodeName)) { // Replace tag node.Name = ReplaceTags[nodeName]; } else { //Keep node as is } }