private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements) { Hashtable replacements = _tagSpecs; if (strict) { replacements = _tagSpecsStrict; } if (modifyReplacements != null) { replacements = (Hashtable)replacements.Clone(); foreach (ModifyReplacement modifyReplacement in modifyReplacements) { modifyReplacement(replacements); } } // Will hold the results of the leading whitespace buffer. // This buffer may or may not make it into the final result, // depending on whether any block-level tags are present. StringBuilder leadingOutput = new StringBuilder(10); // Will hold the results of everything else. StringBuilder mainOutput = new StringBuilder(html.Length); // references whichever output buffer is current. StringBuilder output = leadingOutput; SimpleHtmlParser parser = new SimpleHtmlParser(html); Element el; bool preserveWhitespace = false; // <pre> blocks should preserve whitespace WhitespaceBuffer whitespaceBuffer = new WhitespaceBuffer(); whitespaceBuffer.Promote(WhitespaceClass.Paragraph); // Insert an implicit <p> unless the first non-whitespace element is a block bool hasBlock = false; while (null != (el = parser.Next())) { if (el is Tag) { Tag t = (Tag)el; string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture); TagDesc desc = (TagDesc)replacements[lowerName]; // if this tag is not in the table, drop it if (desc == null) { continue; } // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>) string tagName = desc.Substitute; if (tagName == null) { tagName = lowerName; } // special case for images if (!preserveImages && tagName == TAG_IMG) { continue; } bool beginTag = el is BeginTag; ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType); hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true)) { continue; } output = mainOutput; if (beginTag) { WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output); if (tagName == TAG_PRE) { preserveWhitespace = true; } } else if (el is EndTag) { if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty) { output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName)); } if (tagName == TAG_PRE) { preserveWhitespace = false; } } } else if (el is Text) { string text = el.RawText; text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText)); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false)) { continue; } output = mainOutput; output.Append(text); } } if (hasBlock && ReferenceEquals(mainOutput, output)) { output.Insert(0, leadingOutput.ToString()); } // The whitespace buffer may not be empty at this point. That's OK--we want to drop trailing whitespace return(output.ToString()); }