示例#1
0
        private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements)
        {
            Hashtable replacements = _tagSpecs;

            if (strict)
            {
                replacements = _tagSpecsStrict;
            }

            if (modifyReplacements != null)
            {
                replacements = (Hashtable)replacements.Clone();
                foreach (ModifyReplacement modifyReplacement in modifyReplacements)
                {
                    modifyReplacement(replacements);
                }
            }

            // Will hold the results of the leading whitespace buffer.
            // This buffer may or may not make it into the final result,
            // depending on whether any block-level tags are present.
            StringBuilder leadingOutput = new StringBuilder(10);
            // Will hold the results of everything else.
            StringBuilder mainOutput = new StringBuilder(html.Length);

            // references whichever output buffer is current.
            StringBuilder output = leadingOutput;

            SimpleHtmlParser parser = new SimpleHtmlParser(html);
            Element          el;

            bool             preserveWhitespace = false; // <pre> blocks should preserve whitespace
            WhitespaceBuffer whitespaceBuffer   = new WhitespaceBuffer();

            whitespaceBuffer.Promote(WhitespaceClass.Paragraph);  // Insert an implicit <p> unless the first non-whitespace element is a block
            bool hasBlock = false;

            while (null != (el = parser.Next()))
            {
                if (el is Tag)
                {
                    Tag    t         = (Tag)el;
                    string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture);

                    TagDesc desc = (TagDesc)replacements[lowerName];
                    // if this tag is not in the table, drop it
                    if (desc == null)
                    {
                        continue;
                    }

                    // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>)
                    string tagName = desc.Substitute;
                    if (tagName == null)
                    {
                        tagName = lowerName;
                    }

                    // special case for images
                    if (!preserveImages && tagName == TAG_IMG)
                    {
                        continue;
                    }

                    bool beginTag = el is BeginTag;

                    ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType);
                    hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break);
                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true))
                    {
                        continue;
                    }

                    output = mainOutput;

                    if (beginTag)
                    {
                        WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output);
                        if (tagName == TAG_PRE)
                        {
                            preserveWhitespace = true;
                        }
                    }
                    else if (el is EndTag)
                    {
                        if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty)
                        {
                            output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName));
                        }
                        if (tagName == TAG_PRE)
                        {
                            preserveWhitespace = false;
                        }
                    }
                }
                else if (el is Text)
                {
                    string text = el.RawText;
                    text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText));

                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false))
                    {
                        continue;
                    }

                    output = mainOutput;

                    output.Append(text);
                }
            }

            if (hasBlock && ReferenceEquals(mainOutput, output))
            {
                output.Insert(0, leadingOutput.ToString());
            }

            // The whitespace buffer may not be empty at this point.  That's OK--we want to drop trailing whitespace

            return(output.ToString());
        }