Пример #1
0
        static HtmlLexer()
        {
            Name = IsLetterText.Required()
                   .Then(Text(x => char.IsLetter(x) || char.IsDigit(x) || x == '-' || x == '_' || x == ':'), string.Concat);

            // An attribute value with quotes.  "test" / 'test'
            QuotedValue = Any(Quoted('"'), Quoted('\''));

            // An attribute value sans quote
            // The attribute value can remain unquoted if it doesn't contain spaces or any of " ' ` = < or >.
            UnquotedValue =
                Text(x => !char.IsWhiteSpace(x) &&
                     x != '<' &&
                     x != '>' &&
                     x != '\'' &&
                     x != '"' &&
                     x != '=' &&
                     x != '`'
                     );

            // identifer="value"
            // Or identifer=value
            // Or identifier
            var attribute = Name.Map(x => new HtmlAttribute {
                Key = x
            })
                            .ThenIgnoreWhitespace()
                            .Then(
                Equal
                .ThenIgnoreWhitespace()
                .ThenKeep(Any(QuotedValue, UnquotedValue))
                .Optional(() => null),
                (attr, val) => { attr.Value = val; return(attr); }
                );

            var isSelfClosing = Slash.Map(x => true).Optional(false);

            // <identifier attrs... /?>
            OpenFragment = StartPosition
                           .ThenIgnore(OpenBracket)
                           .ThenCreateFragment(FragmentType.Open)
                           .ThenSetValue(Name).ThenIgnoreWhitespace()
                           .Then(attribute.ThenIgnoreWhitespace().Many(), (node, attrs) => { node.Attributes = attrs.ToArray(); return(node); })
                           .Then(isSelfClosing, (node, sc) => { node.IsSelfClosing = sc; return(node); })
                           .ThenIgnore(CloseBracket)
                           .ThenSetEndPosition();

            // <!-- Comment -->
            var commentEnd = Text("-->");

            CommentFragment = StartPosition
                              .ThenIgnore(Text("<!--"))
                              .ThenCreateFragment(FragmentType.Comment)
                              .ThenSetValue(commentEnd.NotText())
                              .ThenIgnore(commentEnd)
                              .ThenSetEndPosition();

            // </identifier>
            CloseFragment = StartPosition
                            .ThenIgnore(OpenBracket)
                            .ThenIgnore(Slash)
                            .ThenCreateFragment(FragmentType.Close)
                            .ThenSetValue(Name)
                            .ThenIgnore(CloseBracket)
                            .ThenSetEndPosition();

            // <!DOCTYPE html  ... >
            var doctypeContent = Text(x => x != '>');

            DoctypeFragment = StartPosition
                              .ThenIgnore(OpenBracket)
                              .ThenIgnore(IText("!doctype"))
                              .ThenIgnoreWhitespace(true)
                              .ThenCreateFragment(FragmentType.Doctype)
                              .ThenSetValue(doctypeContent)
                              .ThenIgnore(CloseBracket)
                              .ThenSetEndPosition();

            // <script -- requires special text handling, since open/close tags can appear with
            // reckless abandon within a <script> tag.
            // <script>document.write('<p>Doh!</p>');</script>
            var endScript      = CloseFragment.When(x => x.Value != null && string.Compare(x.Value, "script", StringComparison.InvariantCultureIgnoreCase) == 0);
            var scriptContents = StartPosition
                                 .ThenCreateFragment(FragmentType.Text)
                                 .ThenSetValue(endScript.NotText())
                                 .ThenSetEndPosition();

            ScriptFragments = OpenFragment.When(x => string.Compare(x.Value, "script", StringComparison.InvariantCultureIgnoreCase) == 0)
                              .Map(x => new List <Fragment>(3)
            {
                x
            })
                              .Then(scriptContents, (list, content) => { list.Add(content); return(list); })
                              .Then(endScript, (list, close) => { list.Add(close); return(list); });

            // HTML Tags / Comments
            var htmlFragments = Any(DoctypeFragment, CommentFragment, CloseFragment, OpenFragment);

            // Text is everything else
            TextFragment =
                StartPosition
                .ThenCreateFragment(FragmentType.Text)
                .ThenSetValue(Text(x => x != '<').Required())
                .ThenSetEndPosition();

            // Stray open bracket
            var stray =
                StartPosition
                .ThenCreateFragment(FragmentType.Text)
                .ThenSetValue(OpenBracket.Map(x => x.ToString()))
                .ThenSetEndPosition();

            // Text and HTML.
            AllFragments = Any(
                htmlFragments,
                TextFragment,
                stray
                );

            ScriptFragmentsParser = ScriptFragments.CompileParser("ScriptFragmentsParser");
            AllFragmentsParser    = AllFragments.CompileParser("AllFragmentsParser");
        }