コード例 #1
0
        /// <summary>
        /// Converts tag names, attribute names, and style text to lowercase.
        /// </summary>
        private string CleanupHtml(string html, bool xml)
        {
            bool needsCleanup;

            do
            {
                needsCleanup = false;
                StringBuilder    output     = new StringBuilder(html.Length);
                SimpleHtmlParser htmlParser = new SimpleHtmlParser(html);
                for (Element el; null != (el = htmlParser.Next());)
                {
                    if (el is BeginTag)
                    {
                        BeginTag bt = (BeginTag)el;

                        if (RemoveMeaninglessTags(htmlParser, bt))
                        {
                            // Since we are removing a tag, we will want to clean up again, since that might mean
                            // there will be another tag to remove
                            needsCleanup = true;
                            continue;
                        }

                        output.Append("<");
                        output.Append(bt.Name.ToLower(CultureInfo.InvariantCulture));
                        foreach (Attr attr in bt.Attributes)
                        {
                            if (attr.NameEquals("contenteditable") || attr.NameEquals("atomicselection") ||
                                attr.NameEquals("unselectable"))
                            {
                                continue;
                            }

                            output.Append(" ");
                            output.Append(attr.Name.ToLower(CultureInfo.InvariantCulture));
                            if (attr.Value != null)
                            {
                                string attrVal = attr.Value;
                                if (attr.NameEquals("style"))
                                {
                                    attrVal = LowerCaseCss(attrVal);
                                }
                                else if (attr.Name == attr.Value)
                                {
                                    attrVal = attrVal.ToLower(CultureInfo.InvariantCulture);
                                }
                                output.AppendFormat("=\"{0}\"",
                                                    xml
                                                        ? HtmlUtils.EscapeEntitiesForXml(attrVal, true)
                                                        : HtmlUtils.EscapeEntities(attrVal));
                            }
                        }
                        if (bt.HasResidue)
                        {
                            if (bt.Attributes.Length == 0)
                            {
                                output.Append(" ");
                            }
                            output.Append(bt.Residue);
                        }
                        if (bt.Complete)
                        {
                            output.Append(" /");
                        }
                        output.Append(">");
                    }
                    else if (el is EndTag)
                    {
                        output.AppendFormat("</{0}>", ((EndTag)el).Name.ToLower(CultureInfo.InvariantCulture));
                    }
                    else if (el is Text)
                    {
                        string textHtml = HtmlUtils.TidyNbsps(el.RawText);
                        if (xml)
                        {
                            textHtml =
                                HtmlUtils.EscapeEntitiesForXml(
                                    HtmlUtils.UnEscapeEntities(textHtml, HtmlUtils.UnEscapeMode.NonMarkupText), false);
                        }
                        output.Append(textHtml);
                    }
                    else if (el is StyleText)
                    {
                        output.Append(el.RawText.ToLower(CultureInfo.InvariantCulture));
                    }
                    else
                    {
                        output.Append(el.RawText);
                    }
                }
                html = output.ToString();
            } while (needsCleanup);
            return(html);
        }