/// <summary> /// Examines the given ElementClass and modifies the internal WhitespaceBuffer state /// and output buffer accordingly. Returns true if the given ElementClass was whitespace /// and false if not; in the former case, the corresponding element should not be added /// to the output buffer. /// </summary> public static bool ProcessElementClass(ref WhitespaceBuffer whitespace, StringBuilder output, ElementClass elclass, bool isBeginTag) { switch (elclass) { case ElementClass.Paragraph: case ElementClass.Break: case ElementClass.Space: if (whitespace == null) { whitespace = new WhitespaceBuffer(); } whitespace.Promote((WhitespaceClass)elclass); return(true); case ElementClass.Block: case ElementClass.NotBlock: if (whitespace != null) { output.Append(whitespace.Render(elclass)); whitespace = null; } return(false); default: Trace.Fail("Unknown element class " + elclass.ToString()); return(false); } }
/// <summary> /// Examines the given ElementClass and modifies the internal WhitespaceBuffer state /// and output buffer accordingly. Returns true if the given ElementClass was whitespace /// and false if not; in the former case, the corresponding element should not be added /// to the output buffer. /// </summary> public static bool ProcessElementClass(ref WhitespaceBuffer whitespace, StringBuilder output, ElementClass elclass, bool isBeginTag) { switch (elclass) { case ElementClass.Paragraph: case ElementClass.Break: case ElementClass.Space: if (whitespace == null) whitespace = new WhitespaceBuffer(); whitespace.Promote((WhitespaceClass)elclass); return true; case ElementClass.Block: case ElementClass.NotBlock: if (whitespace != null) { output.Append(whitespace.Render(elclass)); whitespace = null; } return false; default: Trace.Fail("Unknown element class " + elclass.ToString()); return false; } }
private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements) { Hashtable replacements = _tagSpecs; if (strict) { replacements = _tagSpecsStrict; } if (modifyReplacements != null) { replacements = (Hashtable)replacements.Clone(); foreach (ModifyReplacement modifyReplacement in modifyReplacements) modifyReplacement(replacements); } // Will hold the results of the leading whitespace buffer. // This buffer may or may not make it into the final result, // depending on whether any block-level tags are present. StringBuilder leadingOutput = new StringBuilder(10); // Will hold the results of everything else. StringBuilder mainOutput = new StringBuilder(html.Length); // references whichever output buffer is current. StringBuilder output = leadingOutput; SimpleHtmlParser parser = new SimpleHtmlParser(html); Element el; bool preserveWhitespace = false; // <pre> blocks should preserve whitespace WhitespaceBuffer whitespaceBuffer = new WhitespaceBuffer(); whitespaceBuffer.Promote(WhitespaceClass.Paragraph); // Insert an implicit <p> unless the first non-whitespace element is a block bool hasBlock = false; while (null != (el = parser.Next())) { if (el is Tag) { Tag t = (Tag)el; string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture); TagDesc desc = (TagDesc)replacements[lowerName]; // if this tag is not in the table, drop it if (desc == null) continue; // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>) string tagName = desc.Substitute; if (tagName == null) tagName = lowerName; // special case for images if (!preserveImages && tagName == TAG_IMG) continue; bool beginTag = el is BeginTag; ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType); hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true)) continue; output = mainOutput; if (beginTag) { WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output); if (tagName == TAG_PRE) preserveWhitespace = true; } else if (el is EndTag) { if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty) { output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName)); } if (tagName == TAG_PRE) preserveWhitespace = false; } } else if (el is Text) { string text = el.RawText; text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText)); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false)) continue; output = mainOutput; output.Append(text); } } if (hasBlock && ReferenceEquals(mainOutput, output)) output.Insert(0, leadingOutput.ToString()); // The whitespace buffer may not be empty at this point. That's OK--we want to drop trailing whitespace return output.ToString(); }
private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements) { Hashtable replacements = _tagSpecs; if (strict) { replacements = _tagSpecsStrict; } if (modifyReplacements != null) { replacements = (Hashtable)replacements.Clone(); foreach (ModifyReplacement modifyReplacement in modifyReplacements) { modifyReplacement(replacements); } } // Will hold the results of the leading whitespace buffer. // This buffer may or may not make it into the final result, // depending on whether any block-level tags are present. StringBuilder leadingOutput = new StringBuilder(10); // Will hold the results of everything else. StringBuilder mainOutput = new StringBuilder(html.Length); // references whichever output buffer is current. StringBuilder output = leadingOutput; SimpleHtmlParser parser = new SimpleHtmlParser(html); Element el; bool preserveWhitespace = false; // <pre> blocks should preserve whitespace WhitespaceBuffer whitespaceBuffer = new WhitespaceBuffer(); whitespaceBuffer.Promote(WhitespaceClass.Paragraph); // Insert an implicit <p> unless the first non-whitespace element is a block bool hasBlock = false; while (null != (el = parser.Next())) { if (el is Tag) { Tag t = (Tag)el; string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture); TagDesc desc = (TagDesc)replacements[lowerName]; // if this tag is not in the table, drop it if (desc == null) { continue; } // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>) string tagName = desc.Substitute; if (tagName == null) { tagName = lowerName; } // special case for images if (!preserveImages && tagName == TAG_IMG) { continue; } bool beginTag = el is BeginTag; ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType); hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true)) { continue; } output = mainOutput; if (beginTag) { WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output); if (tagName == TAG_PRE) { preserveWhitespace = true; } } else if (el is EndTag) { if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty) { output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName)); } if (tagName == TAG_PRE) { preserveWhitespace = false; } } } else if (el is Text) { string text = el.RawText; text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText)); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false)) { continue; } output = mainOutput; output.Append(text); } } if (hasBlock && ReferenceEquals(mainOutput, output)) { output.Insert(0, leadingOutput.ToString()); } // The whitespace buffer may not be empty at this point. That's OK--we want to drop trailing whitespace return(output.ToString()); }