/// <summary> /// Used as a part of HTML thinning to remove extraneous child nodes from an HTMLDOMNode /// </summary> /// <param name="node">The node whose children should be stripped</param> /// <returns>An HTML string with the DOMNodes cleaned out</returns> private static void StripChildNodes(IHTMLDOMNode node, StringBuilder escapedText, bool preserveImages, TickableProgressTick progress) { // is this a text node? If so, just get the text and return it if (node.nodeType == HTMLDocumentHelper.HTMLDOMNodeTypes.TextNode) { escapedText.Append(HttpUtility.HtmlEncode(node.nodeValue.ToString())); } else { progress.Tick(); bool tagStillOpen = false; ArrayList preserveTags = PreserveTags; if (preserveImages) { preserveTags = PreserveTagsWithImages; } // if we're in an element node (a tag) and we should preserve the tag, // append it to the returned text if (preserveTags.Contains(node.nodeName)) { // Append the opening tag element, with any extraneous // attributes stripped escapedText.Append("<" + node.nodeName); StripAttributes((IHTMLElement)node, escapedText); // if the element has no children, we can simply close out the tag if (!node.hasChildNodes()) { if (node.nodeName == HTMLTokens.IFrame) { escapedText.Append("></" + node.nodeName + ">"); } else { escapedText.Append("/>"); } } else // the element has children, leave the tag open { escapedText.Append(">"); tagStillOpen = true; } } else if (ReplaceTags.Contains(node.nodeName)) { // If there are no children, just emit the replacement tag if (!node.hasChildNodes()) { // Replace the tag escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>"); } else { if (!IsChildlessTag((string)ReplaceTags[node.nodeName])) { escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + ">"); } // Since there are children, we're going to emit the replacement // tag at the end of this node tagStillOpen = true; } } if (node.firstChild != null) { StripChildNodes(node.firstChild, escapedText, preserveImages, progress); } // put a closing tag in for the current element (because we left it open in case of children) if (tagStillOpen) { if (PreserveTags.Contains(node.nodeName)) { escapedText.Append("</" + node.nodeName + ">"); } else if (ReplaceTags.Contains(node.nodeName)) { if (!IsChildlessTag((string)ReplaceTags[node.nodeName])) { escapedText.Append("</" + (string)ReplaceTags[node.nodeName] + ">"); } else { escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>"); } } } } if (node.nextSibling != null) { StripChildNodes(node.nextSibling, escapedText, preserveImages, progress); } }