//private static bool InvalidName(string name, params char[] alsoValid) { // if (name == "?xml") return false; // if (name.Length == 0) return true; // for (int i = 0; i < name.Length; i++) { // char c = name[i]; // if (i == 0) { // if (!IsLetter(c)) return true; // } else if (!IsLetterOrDigit(c) && (alsoValid == null || !alsoValid.Contains(c))) // return true; // } // return false; //} private void ReadTag() { var block = ReadTagName(); bool closer = false; if (block.Text.Length == 0 && block.Last == '/') { closer = true; block = ReadTagName(); } var tagName = stripPrefix(KeepTagCase ? block.Text : block.Text.ToLower()); if (block.Text.Length > 0 && block.Text[0] == '!') { if (block.Text.Left(3) == "!--") { if (block.Last == '>' && block.Text.EndsWith("--")) { if (!RemoveComments) { Out("<!--"); block.Text = block.Text.Substring(3); if (block.Text.EndsWith("--")) { block.Text = block.Text.Substring(0, block.Text.Length - 2); } Out(block.Text.TrimEnd('-').Replace("--", " ")); Out("-->"); } } else { var comment = ReadUntil("-->"); if (!RemoveComments && comment.Length >= 3) { Out("<!--"); Out((block.Text.Substring(3) + block.Last + comment.Substring(0, comment.Length - 3)).TrimEnd('-').Replace("--", " ")); Out("-->"); } } } else { //if (next.text.StartsWith("!DOCTYPE html", StringComparison.OrdinalIgnoreCase)) enforceHtmlElement = true; if (block.Last != '>') { block = ReadUntil('>'); // ignore this, doctypes and such } } Next = ReadText; } else if (tagName.Length == 0 && block.Last == '?') { string text = ReadUntil("?>"); //if (enforceHtmlElement && tagName.StartsWith("?xml")) // enforceHtmlElement = false; ////if (openTags.Count == 0 && !hasXmlDeclaration) { //// Out('<' + tagName + ' ' + text.TrimStart()); //// enforceHtmlElement = false; //// hasXmlDeclaration = true; ////} Next = ReadText; return; } else if (tagName.Length == 0 || block.Last == '<' || !IsLetter(tagName[0]) || tagName.Contains(':')) { if (OpenTags.Count > 0) { Out(_LT); Out(ToXml(block.Text, Options.EntitiesOnly)); if (block.Last == '<') { Next = ReadTag; } else if (block.Last == '&') { Next = ReadEntity; } else { if (block.Last == '>') { Out(_GT); } else { Out(block.Last); } Next = ReadText; } } else { Next = ReadText; } } else { if (closer) { if (tagName != "body" && tagName != "html") //we'll close these manually { var openerIndex = OpenTags.LastIndexOf(tagName); Close(openerIndex); } Next = ReadText; } else { bool enabled = true; bool selfClosing = _SelfClosingTags.Contains(tagName); // These are handled manually var attrs = new Dictionary <string, string>(); foreach (var rule in _NestingRules) { if (rule[0].Contains(tagName)) { AutoClose(rule[1], rule[2]); break; } } //if (tagName == "li") { // AutoClose(new[] { "ul", "ol" }, new[] { "li" }); //} else if (tagName == "tr") { // AutoClose(new[] { "table", "thead", "tbody" }, new[] { "td", "th", "tr" }); //} else if (tagName == "td" || tagName == "th") { // AutoClose(new[] { "table", "thead", "tbody" }, new[] { "td", "th" }); //} else if (tagName == "p" || tagName == "blockquote" || tagName == "ul" || tagName == "ol") { // AutoClose(new[] { "div", "table", "body" }, new[] { "p", "blockquote", "ul", "ol" }); //} else if (tagName == "frame") { // AutoClose(new[] { "frameset" }, new[] { "frame" }); //} else if (tagName == "frameset") { // AutoClose(new[] { "frame" }, new[] { "frame" }); //} //if (!EnforceHtmlElement && OpenTags.Count == 0 && _CommonTags.Contains(tagName)) // EnforceHtmlElement = true; if (EnforceHtmlElement) { if (tagName != "html" && OpenTags.Count == 0) { OpenTags.Add("html"); Out("<html>"); } else if (tagName == "html" && OpenTags.Count > 0) { enabled = false; } else if (tagName == "body" && OpenTags.Contains("body")) { enabled = false; } if (OpenTags.Count > 0 && tagName != "body" && tagName != "head" && !OpenTags.Contains("head") && !OpenTags.Contains("body")) { Out("<body>"); OpenTags.Add("body"); } } if (enabled) { if (!selfClosing && tagName != "script" && tagName != "style") { OpenTags.Add(tagName); } Out("<" + tagName); NumTagsWritten++; } while (block.Last != '>') { ReadWhileWhitespace(); block = ReadAttrName(); if (block.Text.Length == 0 && block.Last == 0) { break; } var attrName = KeepTagCase ? block.Text : block.Text.ToLower(); attrName = stripPrefix(attrName); if (attrName.Length == 0 || !IsLetter(attrName[0]) || attrName.Contains(':')) { continue; } ReadWhileWhitespace(); char?c = Peek(); if (c == null) { break; } var attrValue = string.Empty; if (block.Last == '=' || c == '=') { if (c == '=') { Input.Read(); } ReadWhileWhitespace(); c = Peek(); if (c == null) { break; } if (c == '\'') { Input.Read(); block = Read(Modes.attrValueTick); attrValue = '\'' + block.Text.Trim() + '\''; } else if (c == '"') { Input.Read(); block = Read(Modes.attrValueQuote); attrValue = '"' + block.Text.Trim() + '"'; } else { block = Read(Modes.attrValue); attrValue = '"' + block.Text + '"'; } } else { attrValue = '"' + attrName + '"'; } if (enabled && !(RemoveXmlns && attrName == "xmlns")) { attrValue = ToXml(attrValue, Options.EntitiesOnly); if (attrs.ContainsKey(attrName)) { attrs[attrName] = attrValue; } else { attrs.Add(attrName, attrValue); } } } foreach (var attr in attrs) { Out(' ' + attr.Key + '=' + attr.Value); } if (enabled && selfClosing) { Out('/'); } if (enabled) { Out('>'); } if (tagName == "script" || tagName == "style") { ReadWhileWhitespace(); var text = ReadUntil("</" + tagName, StringComparison.OrdinalIgnoreCase); if (text.Length >= tagName.Length + 2) { text = text.Substring(0, text.Length - tagName.Length - 2).TrimEnd(); ReadUntil('>'); } if (text.Length > 0) { var i = text.IndexOf("<![CDATA["); if (i == -1) { text = "/*<![CDATA[*/" + text; } else { var part = text.Substring(0, i); text = ToXml(part, Options.EntitiesOnly) + text.Substring(i); } i = text.IndexOf("]]>"); if (i == -1) { text = text + "/*]]>*/"; } else { var part = text.Substring(i + 3); text = text.Substring(0, i + 3) + ToXml(part, Options.EntitiesOnly); } } if (enabled) { Out(text + "</" + tagName + ">"); } } Next = ReadText; } } }
protected bool IsClosed(string tag) => openFixed ? !OpenTags.Contains(tag) : ClosedTags.Contains(tag);