protected override void OnBeginTag(BeginTag tag) { if (tag.NameEquals(HTMLTokens.Title) && !tag.Complete) { _inTitle = true; } if (TagsToPreserve.Contains(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { EmitTagAndAttributes(tag.Name, tag); } else if (ReplaceTags.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { EmitTagAndAttributes((string)ReplaceTags[tag.Name.ToUpper(CultureInfo.InvariantCulture)], tag); } }
protected override void OnEndTag(EndTag tag) { if (tag.Implicit) { return; } if (tag.NameEquals(HTMLTokens.Title)) { _inTitle = false; } if (TagsToPreserve.Contains(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { EmitTagAndAttributes(tag.Name, tag); } else if (ReplaceTags.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { EmitTagAndAttributes((string)ReplaceTags[tag.Name.ToUpper(CultureInfo.InvariantCulture)], tag); } }
private void HandleNextNode(Queue <HtmlNode> nodes, IDataMap map, string itemPath, Item importRow) { var node = nodes.Dequeue(); var nodeName = node.Name.ToLower(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (UnwantedTags.Any(tag => tag == nodeName)) { // if this node is one to remove if (childNodes != null) { // make sure children are added back foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } else if (node.HasAttributes) { // if it's not being removed foreach (string s in UnwantedAttributes) // remove unwanted attributes { node.Attributes.Remove(s); } //replace images if (nodeName.Equals("img")) { // see if it exists string imgSrc = node.Attributes["src"].Value; DynamicLink dynamicLink; if (!DynamicLink.TryParse(imgSrc, out dynamicLink)) { return; } MediaItem mediaItem = importRow.Database.GetItem(dynamicLink.ItemId, dynamicLink.Language ?? map.ImportToLanguage); var mediaParentItem = BuildMediaPath(map.ToDB, mediaItem.InnerItem.Paths.ParentPath); MediaItem newImg = HandleMediaItem(map, mediaParentItem, itemPath, mediaItem); if (newImg != null) { string newSrc = string.Format("-/media/{0}.ashx", newImg.ID.ToShortID().ToString()); // replace the node with sitecore tag node.SetAttributeValue("src", newSrc); } } } else if (ReplaceTags.ContainsKey(nodeName)) { // Replace tag node.Name = ReplaceTags[nodeName]; } else { //Keep node as is } }
/// <summary> /// Used as a part of HTML thinning to remove extraneous child nodes from an HTMLDOMNode /// </summary> /// <param name="node">The node whose children should be stripped</param> /// <returns>An HTML string with the DOMNodes cleaned out</returns> private static void StripChildNodes(IHTMLDOMNode node, StringBuilder escapedText, bool preserveImages, TickableProgressTick progress) { // is this a text node? If so, just get the text and return it if (node.nodeType == HTMLDocumentHelper.HTMLDOMNodeTypes.TextNode) { escapedText.Append(HttpUtility.HtmlEncode(node.nodeValue.ToString())); } else { progress.Tick(); bool tagStillOpen = false; ArrayList preserveTags = PreserveTags; if (preserveImages) { preserveTags = PreserveTagsWithImages; } // if we're in an element node (a tag) and we should preserve the tag, // append it to the returned text if (preserveTags.Contains(node.nodeName)) { // Append the opening tag element, with any extraneous // attributes stripped escapedText.Append("<" + node.nodeName); StripAttributes((IHTMLElement)node, escapedText); // if the element has no children, we can simply close out the tag if (!node.hasChildNodes()) { if (node.nodeName == HTMLTokens.IFrame) { escapedText.Append("></" + node.nodeName + ">"); } else { escapedText.Append("/>"); } } else // the element has children, leave the tag open { escapedText.Append(">"); tagStillOpen = true; } } else if (ReplaceTags.Contains(node.nodeName)) { // If there are no children, just emit the replacement tag if (!node.hasChildNodes()) { // Replace the tag escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>"); } else { if (!IsChildlessTag((string)ReplaceTags[node.nodeName])) { escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + ">"); } // Since there are children, we're going to emit the replacement // tag at the end of this node tagStillOpen = true; } } if (node.firstChild != null) { StripChildNodes(node.firstChild, escapedText, preserveImages, progress); } // put a closing tag in for the current element (because we left it open in case of children) if (tagStillOpen) { if (PreserveTags.Contains(node.nodeName)) { escapedText.Append("</" + node.nodeName + ">"); } else if (ReplaceTags.Contains(node.nodeName)) { if (!IsChildlessTag((string)ReplaceTags[node.nodeName])) { escapedText.Append("</" + (string)ReplaceTags[node.nodeName] + ">"); } else { escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>"); } } } } if (node.nextSibling != null) { StripChildNodes(node.nextSibling, escapedText, preserveImages, progress); } }