/// <summary> /// Converts tag names, attribute names, and style text to lowercase. /// </summary> private string CleanupHtml(string html, bool xml) { bool needsCleanup; do { needsCleanup = false; StringBuilder output = new StringBuilder(html.Length); SimpleHtmlParser htmlParser = new SimpleHtmlParser(html); for (Element el; null != (el = htmlParser.Next());) { if (el is BeginTag) { BeginTag bt = (BeginTag)el; if (RemoveMeaninglessTags(htmlParser, bt)) { // Since we are removing a tag, we will want to clean up again, since that might mean // there will be another tag to remove needsCleanup = true; continue; } output.Append("<"); output.Append(bt.Name.ToLower(CultureInfo.InvariantCulture)); foreach (Attr attr in bt.Attributes) { if (attr.NameEquals("contenteditable") || attr.NameEquals("atomicselection") || attr.NameEquals("unselectable")) { continue; } output.Append(" "); output.Append(attr.Name.ToLower(CultureInfo.InvariantCulture)); if (attr.Value != null) { string attrVal = attr.Value; if (attr.NameEquals("style")) { attrVal = LowerCaseCss(attrVal); } else if (attr.Name == attr.Value) { attrVal = attrVal.ToLower(CultureInfo.InvariantCulture); } output.AppendFormat("=\"{0}\"", xml ? HtmlUtils.EscapeEntitiesForXml(attrVal, true) : HtmlUtils.EscapeEntities(attrVal)); } } if (bt.HasResidue) { if (bt.Attributes.Length == 0) { output.Append(" "); } output.Append(bt.Residue); } if (bt.Complete) { output.Append(" /"); } output.Append(">"); } else if (el is EndTag) { output.AppendFormat("</{0}>", ((EndTag)el).Name.ToLower(CultureInfo.InvariantCulture)); } else if (el is Text) { string textHtml = HtmlUtils.TidyNbsps(el.RawText); if (xml) { textHtml = HtmlUtils.EscapeEntitiesForXml( HtmlUtils.UnEscapeEntities(textHtml, HtmlUtils.UnEscapeMode.NonMarkupText), false); } output.Append(textHtml); } else if (el is StyleText) { output.Append(el.RawText.ToLower(CultureInfo.InvariantCulture)); } else { output.Append(el.RawText); } } html = output.ToString(); } while (needsCleanup); return(html); }