Пример #1
0
        //private static bool InvalidName(string name, params char[] alsoValid) {
        //    if (name == "?xml") return false;
        //    if (name.Length == 0) return true;

        //    for (int i = 0; i < name.Length; i++) {
        //        char c = name[i];
        //        if (i == 0) {
        //            if (!IsLetter(c)) return true;
        //        } else if (!IsLetterOrDigit(c) && (alsoValid == null || !alsoValid.Contains(c)))
        //            return true;
        //    }
        //    return false;
        //}

        private void ReadTag()
        {
            var  block  = ReadTagName();
            bool closer = false;

            if (block.Text.Length == 0 && block.Last == '/')
            {
                closer = true;
                block  = ReadTagName();
            }

            var tagName = stripPrefix(KeepTagCase ? block.Text : block.Text.ToLower());

            if (block.Text.Length > 0 && block.Text[0] == '!')
            {
                if (block.Text.Left(3) == "!--")
                {
                    if (block.Last == '>' && block.Text.EndsWith("--"))
                    {
                        if (!RemoveComments)
                        {
                            Out("<!--");
                            block.Text = block.Text.Substring(3);
                            if (block.Text.EndsWith("--"))
                            {
                                block.Text = block.Text.Substring(0, block.Text.Length - 2);
                            }
                            Out(block.Text.TrimEnd('-').Replace("--", "  "));
                            Out("-->");
                        }
                    }
                    else
                    {
                        var comment = ReadUntil("-->");
                        if (!RemoveComments && comment.Length >= 3)
                        {
                            Out("<!--");
                            Out((block.Text.Substring(3) + block.Last + comment.Substring(0, comment.Length - 3)).TrimEnd('-').Replace("--", "  "));
                            Out("-->");
                        }
                    }
                }
                else
                {
                    //if (next.text.StartsWith("!DOCTYPE html", StringComparison.OrdinalIgnoreCase)) enforceHtmlElement = true;
                    if (block.Last != '>')
                    {
                        block = ReadUntil('>');                                        // ignore this, doctypes and such
                    }
                }

                Next = ReadText;
            }
            else if (tagName.Length == 0 && block.Last == '?')
            {
                string text = ReadUntil("?>");
                //if (enforceHtmlElement && tagName.StartsWith("?xml"))
                //    enforceHtmlElement = false;
                ////if (openTags.Count == 0 && !hasXmlDeclaration) {
                ////    Out('<' + tagName + ' ' + text.TrimStart());
                ////    enforceHtmlElement = false;
                ////    hasXmlDeclaration = true;
                ////}
                Next = ReadText;
                return;
            }
            else if (tagName.Length == 0 || block.Last == '<' || !IsLetter(tagName[0]) || tagName.Contains(':'))
            {
                if (OpenTags.Count > 0)
                {
                    Out(_LT);
                    Out(ToXml(block.Text, Options.EntitiesOnly));

                    if (block.Last == '<')
                    {
                        Next = ReadTag;
                    }
                    else if (block.Last == '&')
                    {
                        Next = ReadEntity;
                    }
                    else
                    {
                        if (block.Last == '>')
                        {
                            Out(_GT);
                        }
                        else
                        {
                            Out(block.Last);
                        }
                        Next = ReadText;
                    }
                }
                else
                {
                    Next = ReadText;
                }
            }
            else
            {
                if (closer)
                {
                    if (tagName != "body" && tagName != "html")                       //we'll close these manually
                    {
                        var openerIndex = OpenTags.LastIndexOf(tagName);

                        Close(openerIndex);
                    }

                    Next = ReadText;
                }
                else
                {
                    bool enabled     = true;
                    bool selfClosing = _SelfClosingTags.Contains(tagName);                     // These are handled manually
                    var  attrs       = new Dictionary <string, string>();


                    foreach (var rule in _NestingRules)
                    {
                        if (rule[0].Contains(tagName))
                        {
                            AutoClose(rule[1], rule[2]);
                            break;
                        }
                    }

                    //if (tagName == "li") {
                    //    AutoClose(new[] { "ul", "ol" }, new[] { "li" });
                    //} else if (tagName == "tr") {
                    //    AutoClose(new[] { "table", "thead", "tbody" }, new[] { "td", "th", "tr" });
                    //} else if (tagName == "td" || tagName == "th") {
                    //    AutoClose(new[] { "table", "thead", "tbody" }, new[] { "td", "th" });
                    //} else if (tagName == "p" || tagName == "blockquote" || tagName == "ul" || tagName == "ol") {
                    //    AutoClose(new[] { "div", "table", "body" }, new[] { "p", "blockquote", "ul", "ol" });
                    //} else if (tagName == "frame") {
                    //    AutoClose(new[] { "frameset" }, new[] { "frame" });
                    //} else if (tagName == "frameset") {
                    //    AutoClose(new[] { "frame" }, new[] { "frame" });
                    //}



                    //if (!EnforceHtmlElement && OpenTags.Count == 0 && _CommonTags.Contains(tagName))
                    //    EnforceHtmlElement = true;

                    if (EnforceHtmlElement)
                    {
                        if (tagName != "html" && OpenTags.Count == 0)
                        {
                            OpenTags.Add("html");
                            Out("<html>");
                        }
                        else if (tagName == "html" && OpenTags.Count > 0)
                        {
                            enabled = false;
                        }
                        else if (tagName == "body" && OpenTags.Contains("body"))
                        {
                            enabled = false;
                        }

                        if (OpenTags.Count > 0 && tagName != "body" && tagName != "head" && !OpenTags.Contains("head") && !OpenTags.Contains("body"))
                        {
                            Out("<body>");
                            OpenTags.Add("body");
                        }
                    }

                    if (enabled)
                    {
                        if (!selfClosing && tagName != "script" && tagName != "style")
                        {
                            OpenTags.Add(tagName);
                        }

                        Out("<" + tagName);
                        NumTagsWritten++;
                    }

                    while (block.Last != '>')
                    {
                        ReadWhileWhitespace();
                        block = ReadAttrName();
                        if (block.Text.Length == 0 && block.Last == 0)
                        {
                            break;
                        }
                        var attrName = KeepTagCase ? block.Text : block.Text.ToLower();

                        attrName = stripPrefix(attrName);
                        if (attrName.Length == 0 ||
                            !IsLetter(attrName[0]) ||
                            attrName.Contains(':'))
                        {
                            continue;
                        }

                        ReadWhileWhitespace();

                        char?c = Peek();
                        if (c == null)
                        {
                            break;
                        }

                        var attrValue = string.Empty;

                        if (block.Last == '=' || c == '=')
                        {
                            if (c == '=')
                            {
                                Input.Read();
                            }
                            ReadWhileWhitespace();
                            c = Peek();
                            if (c == null)
                            {
                                break;
                            }

                            if (c == '\'')
                            {
                                Input.Read();
                                block     = Read(Modes.attrValueTick);
                                attrValue = '\'' + block.Text.Trim() + '\'';
                            }
                            else if (c == '"')
                            {
                                Input.Read();
                                block     = Read(Modes.attrValueQuote);
                                attrValue = '"' + block.Text.Trim() + '"';
                            }
                            else
                            {
                                block     = Read(Modes.attrValue);
                                attrValue = '"' + block.Text + '"';
                            }
                        }
                        else
                        {
                            attrValue = '"' + attrName + '"';
                        }

                        if (enabled && !(RemoveXmlns && attrName == "xmlns"))
                        {
                            attrValue = ToXml(attrValue, Options.EntitiesOnly);

                            if (attrs.ContainsKey(attrName))
                            {
                                attrs[attrName] = attrValue;
                            }
                            else
                            {
                                attrs.Add(attrName, attrValue);
                            }
                        }
                    }

                    foreach (var attr in attrs)
                    {
                        Out(' ' + attr.Key + '=' + attr.Value);
                    }

                    if (enabled && selfClosing)
                    {
                        Out('/');
                    }
                    if (enabled)
                    {
                        Out('>');
                    }

                    if (tagName == "script" || tagName == "style")
                    {
                        ReadWhileWhitespace();
                        var text = ReadUntil("</" + tagName, StringComparison.OrdinalIgnoreCase);
                        if (text.Length >= tagName.Length + 2)
                        {
                            text = text.Substring(0, text.Length - tagName.Length - 2).TrimEnd();
                            ReadUntil('>');
                        }

                        if (text.Length > 0)
                        {
                            var i = text.IndexOf("<![CDATA[");
                            if (i == -1)
                            {
                                text = "/*<![CDATA[*/" + text;
                            }
                            else
                            {
                                var part = text.Substring(0, i);
                                text = ToXml(part, Options.EntitiesOnly) + text.Substring(i);
                            }

                            i = text.IndexOf("]]>");
                            if (i == -1)
                            {
                                text = text + "/*]]>*/";
                            }
                            else
                            {
                                var part = text.Substring(i + 3);
                                text = text.Substring(0, i + 3) + ToXml(part, Options.EntitiesOnly);
                            }
                        }

                        if (enabled)
                        {
                            Out(text + "</" + tagName + ">");
                        }
                    }

                    Next = ReadText;
                }
            }
        }
Пример #2
0
 protected bool IsClosed(string tag)
 => openFixed
     ? !OpenTags.Contains(tag)
     : ClosedTags.Contains(tag);