示例#1
0
            /// <summary>
            /// Examines the given ElementClass and modifies the internal WhitespaceBuffer state
            /// and output buffer accordingly.  Returns true if the given ElementClass was whitespace
            /// and false if not; in the former case, the corresponding element should not be added
            /// to the output buffer.
            /// </summary>
            public static bool ProcessElementClass(ref WhitespaceBuffer whitespace, StringBuilder output, ElementClass elclass, bool isBeginTag)
            {
                switch (elclass)
                {
                case ElementClass.Paragraph:
                case ElementClass.Break:
                case ElementClass.Space:
                    if (whitespace == null)
                    {
                        whitespace = new WhitespaceBuffer();
                    }
                    whitespace.Promote((WhitespaceClass)elclass);
                    return(true);

                case ElementClass.Block:
                case ElementClass.NotBlock:
                    if (whitespace != null)
                    {
                        output.Append(whitespace.Render(elclass));
                        whitespace = null;
                    }
                    return(false);

                default:
                    Trace.Fail("Unknown element class " + elclass.ToString());
                    return(false);
                }
            }
 /// <summary>
 /// Examines the given ElementClass and modifies the internal WhitespaceBuffer state
 /// and output buffer accordingly.  Returns true if the given ElementClass was whitespace
 /// and false if not; in the former case, the corresponding element should not be added
 /// to the output buffer.
 /// </summary>
 public static bool ProcessElementClass(ref WhitespaceBuffer whitespace, StringBuilder output, ElementClass elclass, bool isBeginTag)
 {
     switch (elclass)
     {
         case ElementClass.Paragraph:
         case ElementClass.Break:
         case ElementClass.Space:
             if (whitespace == null)
                 whitespace = new WhitespaceBuffer();
             whitespace.Promote((WhitespaceClass)elclass);
             return true;
         case ElementClass.Block:
         case ElementClass.NotBlock:
             if (whitespace != null)
             {
                 output.Append(whitespace.Render(elclass));
                 whitespace = null;
             }
             return false;
         default:
             Trace.Fail("Unknown element class " + elclass.ToString());
             return false;
     }
 }
        private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements)
        {
            Hashtable replacements = _tagSpecs;
            if (strict)
            {
                replacements = _tagSpecsStrict;
            }

            if (modifyReplacements != null)
            {
                replacements = (Hashtable)replacements.Clone();
                foreach (ModifyReplacement modifyReplacement in modifyReplacements)
                    modifyReplacement(replacements);
            }

            // Will hold the results of the leading whitespace buffer.
            // This buffer may or may not make it into the final result,
            // depending on whether any block-level tags are present.
            StringBuilder leadingOutput = new StringBuilder(10);
            // Will hold the results of everything else.
            StringBuilder mainOutput = new StringBuilder(html.Length);

            // references whichever output buffer is current.
            StringBuilder output = leadingOutput;

            SimpleHtmlParser parser = new SimpleHtmlParser(html);
            Element el;

            bool preserveWhitespace = false;  // <pre> blocks should preserve whitespace
            WhitespaceBuffer whitespaceBuffer = new WhitespaceBuffer();
            whitespaceBuffer.Promote(WhitespaceClass.Paragraph);  // Insert an implicit <p> unless the first non-whitespace element is a block
            bool hasBlock = false;

            while (null != (el = parser.Next()))
            {
                if (el is Tag)
                {
                    Tag t = (Tag)el;
                    string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture);

                    TagDesc desc = (TagDesc)replacements[lowerName];
                    // if this tag is not in the table, drop it
                    if (desc == null)
                        continue;

                    // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>)
                    string tagName = desc.Substitute;
                    if (tagName == null)
                        tagName = lowerName;

                    // special case for images
                    if (!preserveImages && tagName == TAG_IMG)
                        continue;

                    bool beginTag = el is BeginTag;

                    ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType);
                    hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break);
                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true))
                        continue;

                    output = mainOutput;

                    if (beginTag)
                    {
                        WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output);
                        if (tagName == TAG_PRE)
                            preserveWhitespace = true;
                    }
                    else if (el is EndTag)
                    {
                        if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty)
                        {
                            output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName));
                        }
                        if (tagName == TAG_PRE)
                            preserveWhitespace = false;
                    }
                }
                else if (el is Text)
                {
                    string text = el.RawText;
                    text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText));

                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false))
                        continue;

                    output = mainOutput;

                    output.Append(text);
                }
            }

            if (hasBlock && ReferenceEquals(mainOutput, output))
                output.Insert(0, leadingOutput.ToString());

            // The whitespace buffer may not be empty at this point.  That's OK--we want to drop trailing whitespace

            return output.ToString();
        }
示例#4
0
        private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements)
        {
            Hashtable replacements = _tagSpecs;

            if (strict)
            {
                replacements = _tagSpecsStrict;
            }

            if (modifyReplacements != null)
            {
                replacements = (Hashtable)replacements.Clone();
                foreach (ModifyReplacement modifyReplacement in modifyReplacements)
                {
                    modifyReplacement(replacements);
                }
            }

            // Will hold the results of the leading whitespace buffer.
            // This buffer may or may not make it into the final result,
            // depending on whether any block-level tags are present.
            StringBuilder leadingOutput = new StringBuilder(10);
            // Will hold the results of everything else.
            StringBuilder mainOutput = new StringBuilder(html.Length);

            // references whichever output buffer is current.
            StringBuilder output = leadingOutput;

            SimpleHtmlParser parser = new SimpleHtmlParser(html);
            Element          el;

            bool             preserveWhitespace = false; // <pre> blocks should preserve whitespace
            WhitespaceBuffer whitespaceBuffer   = new WhitespaceBuffer();

            whitespaceBuffer.Promote(WhitespaceClass.Paragraph);  // Insert an implicit <p> unless the first non-whitespace element is a block
            bool hasBlock = false;

            while (null != (el = parser.Next()))
            {
                if (el is Tag)
                {
                    Tag    t         = (Tag)el;
                    string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture);

                    TagDesc desc = (TagDesc)replacements[lowerName];
                    // if this tag is not in the table, drop it
                    if (desc == null)
                    {
                        continue;
                    }

                    // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>)
                    string tagName = desc.Substitute;
                    if (tagName == null)
                    {
                        tagName = lowerName;
                    }

                    // special case for images
                    if (!preserveImages && tagName == TAG_IMG)
                    {
                        continue;
                    }

                    bool beginTag = el is BeginTag;

                    ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType);
                    hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break);
                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true))
                    {
                        continue;
                    }

                    output = mainOutput;

                    if (beginTag)
                    {
                        WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output);
                        if (tagName == TAG_PRE)
                        {
                            preserveWhitespace = true;
                        }
                    }
                    else if (el is EndTag)
                    {
                        if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty)
                        {
                            output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName));
                        }
                        if (tagName == TAG_PRE)
                        {
                            preserveWhitespace = false;
                        }
                    }
                }
                else if (el is Text)
                {
                    string text = el.RawText;
                    text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText));

                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false))
                    {
                        continue;
                    }

                    output = mainOutput;

                    output.Append(text);
                }
            }

            if (hasBlock && ReferenceEquals(mainOutput, output))
            {
                output.Insert(0, leadingOutput.ToString());
            }

            // The whitespace buffer may not be empty at this point.  That's OK--we want to drop trailing whitespace

            return(output.ToString());
        }