Beispiel #1
0
        Parser CharacterSet(Match match)
        {
            var alt        = new AlternativeParser();
            var inverse    = match.Text.StartsWith("[^", StringComparison.Ordinal);
            var characters = new List <char>();

            for (int i = 0; i < match.Matches.Count; i++)
            {
                Match child = match.Matches[i];
                if (child.Name == null)
                {
                    continue;
                }
                switch (child.Name)
                {
                case "character range":
                    var first = Character(child.Matches.First(r => r.Name == "character"));
                    var last  = Character(child.Matches.Last(r => r.Name == "character"));
                    if (first != null && last != null)
                    {
                        alt.Add(new CharRangeTerminal(first.Value, last.Value)
                        {
                            Inverse = inverse
                        });
                    }
                    break;

                case "character":
                    var character = Character(child);
                    if (character != null)
                    {
                        characters.Add(character.Value);
                    }
                    break;

                default:
                    throw new FormatException(string.Format("Invalid character set child for text '{0}'", child.Text));
                }
            }
            if (characters.Count > 0)
            {
                alt.Add(new CharSetTerminal(characters.ToArray())
                {
                    Inverse = inverse
                });
            }
            if (alt.Items.Count > 1)
            {
                return(alt);
            }
            if (alt.Items.Count > 0)
            {
                return(alt.Items[0]);
            }
            return(new UnaryParser());
            //throw new FormatException(string.Format("Character set has no characters '{0}'", match.Text));
        }
Beispiel #2
0
        public BnfGrammar(BnfStyle style = BnfStyle.All)
            : base("bnf")
        {
            Parser ows = Terminals.WhiteSpace.Repeat(0);
            Parser rws = Terminals.WhiteSpace.Repeat(1);

            if (style.HasFlag(BnfStyle.CommonTerminals))
            {
                foreach (var terminal in Terminals.GetTerminals())
                {
                    baseLookup[terminal.Item1] = terminal.Item2;
                }
            }

            if (style.HasFlag(BnfStyle.Comments))
            {
                // allow ebnf comments
                var comment = new GroupParser("(*", "*)");
                ows = (Terminals.WhiteSpace | comment).Repeat(0);
                rws = (Terminals.WhiteSpace | comment).Repeat(1);
            }

            literal = (
                (sq & (+!sq).WithName("value").Optional() & sq)
                | (dq & (+!dq).WithName("value").Optional() & dq)
                | (+Terminals.Set(" \n\r<[{(|)}]>").Inverse()).WithName("value")
                ).WithName("parser");


            RuleNameParser = "<" & Terminals.Set("<>\n\r").Inverse().Repeat().WithName("name") & ">";

            RuleParser = new AlternativeParser();             // defined later

            TermParser      = new AlternativeParser();
            TermParser.Name = "term";
            TermParser.Add((ruleName = RuleNameParser.Named("parser")).NotFollowedBy(ows & ruleSeparator));
            if (style.HasFlag(BnfStyle.Cardinality))
            {
                TermParser.Items.Add('(' & ows & RuleParser & ows & ')');
                TermParser.Items.Add(repeatRule   = ('{' & ows & RuleParser & ows & '}').WithName("parser"));
                TermParser.Items.Add(optionalRule = ('[' & ows & RuleParser & ows & ']').WithName("parser"));
            }
            TermParser.Items.Add(literal);

            var rep = -(~((+Terminals.WhiteSpace).WithName("ws")) & TermParser);

            rep.Name     = "rep";
            rep.AddMatch = false;
            list         = (TermParser & rep).WithName("parser");

            listRepeat = (list.Named("list") & ows & '|' & ~(ows & RuleParser.Named("expression"))).WithName("parser");
            RuleParser.Items.Add(listRepeat);
            RuleParser.Items.Add(list);
            RuleParser.Items.Add((ows & RuleNameParser & ows & ruleSeparator).Not() & Terminals.WhiteSpace.Inverse().Repeat().WithName("value").Named("parser"));

            rule         = (RuleNameParser.Named("ruleName") & ows & ruleSeparator & ows & RuleParser).WithName("parser");
            Expresssions = new AlternativeParser();
            Expresssions.Items.Add(rule);

            this.Inner = ows & (+Expresssions).SeparatedBy(rws) & ows;

            AttachEvents();
        }
Beispiel #3
0
        public EbnfGrammar(EbnfStyle style)
            : base("ebnf")
        {
            Style = style;
            DefineCommonNonTerminals = true;
            GenerateSpecialSequences();

            // terminals
            var comment         = style.HasFlag(EbnfStyle.BracketComments) ? new GroupParser("(*", "*)") : new GroupParser("/*", "*/");
            var ows             = -(Terminals.WhiteSpace | comment);
            var rws             = +(Terminals.WhiteSpace | comment);
            var hex_character   = ("#x" & +Terminals.HexDigit);
            var character       = (("\\" & Terminals.AnyChar) | hex_character | Terminals.AnyChar.Except("]")).WithName("character");
            var character_range = (character & "-" & character).WithName("character range");
            var character_set   = ("[" & ~(Parser)"^" & +(character_range | character) & "]").WithName("character set");
            var terminal_string = new StringParser {
                QuoteCharacters = new [] { '\"', '\'', '’' }, Name = "terminal string"
            };
            var special_sequence         = ("?" & (+Terminals.AnyChar).Until("?").WithName("name") & "?").WithName("special sequence");
            var meta_identifier_terminal = Terminals.Letter & -(Terminals.LetterOrDigit | '_');
            var integer = new NumberParser().WithName("integer");

            // nonterminals
            var    definition_list   = new RepeatParser(0).WithName("definition list");
            var    single_definition = new RepeatParser(1).WithName("single definition");
            var    term            = new SequenceParser().WithName("term");
            var    primary         = new AlternativeParser().WithName("primary");
            var    exception       = new UnaryParser("exception");
            var    factor          = new SequenceParser().WithName("factor");
            var    meta_identifier = new RepeatParser(1).WithName("meta identifier");
            var    syntax_rule     = new SequenceParser().WithName("syntax rule");
            var    rule_equals     = new AlternativeParser().WithName("equals");
            Parser meta_reference  = meta_identifier;

            Parser grouped_sequence = ("(" & ows & definition_list & ows & ")").WithName("grouped sequence");

            if (style.HasFlag(EbnfStyle.SquareBracketAsOptional))
            {
                primary.Add(("[" & ows & definition_list & ows & "]").WithName("optional sequence"));
            }

            if (!style.HasFlag(EbnfStyle.CardinalityFlags))
            {
                var repeated_sequence = ("{" & ows & definition_list & ows & "}").WithName("repeated sequence");
                primary.Add(repeated_sequence);
            }

            // rules
            meta_identifier.Inner     = meta_identifier_terminal;
            meta_identifier.Separator = +(Terminals.SingleLineWhiteSpace);
            if (!style.HasFlag(EbnfStyle.CommaSeparator))
            {
                // w3c identifiers must be a single word
                meta_identifier.Maximum = 1;
                meta_reference          = meta_reference.NotFollowedBy(ows & rule_equals);
            }
            primary.Add(grouped_sequence, meta_reference, terminal_string, special_sequence);
            if (style.HasFlag(EbnfStyle.CharacterSets) && !style.HasFlag(EbnfStyle.SquareBracketAsOptional))
            {
                // w3c supports character sets
                primary.Add(hex_character.Named("hex character"));
                primary.Add(character_set);
            }
            if (style.HasFlag(EbnfStyle.NumericCardinality))
            {
                factor.Add(~(integer & ows & "*" & ows));
            }
            factor.Add(primary);
            if (style.HasFlag(EbnfStyle.CardinalityFlags))
            {
                // w3c defines cardinality at the end of a factor
                var flags = style.HasFlag(EbnfStyle.SquareBracketAsOptional) ? "*+" : "?*+";
                factor.Add(~(ows & Terminals.Set(flags).WithName("cardinality")));
            }
            term.Add(factor, ~(ows & "-" & ows & exception));
            exception.Inner             = term;
            single_definition.Inner     = term;
            single_definition.Separator = style.HasFlag(EbnfStyle.CommaSeparator) ? (Parser)(ows & "," & ows) : ows;
            definition_list.Inner       = single_definition;
            definition_list.Separator   = ows & "|" & ows;
            rule_equals.Add(style.HasFlag(EbnfStyle.DoubleColonEquals) ? "::=" : "=", ":=");
            syntax_rule.Add(meta_identifier, ows, rule_equals, ows, definition_list);
            if (style.HasFlag(EbnfStyle.SemicolonTerminator))
            {
                syntax_rule.Add(ows, ";");                 // iso rules are terminated by a semicolon
            }
            var syntax_rules = +syntax_rule;

            syntax_rules.Separator = style.HasFlag(EbnfStyle.SemicolonTerminator) ? ows : rws;

            Inner = ows & syntax_rules & ows;

            AttachEvents();
        }
Beispiel #4
0
        public GoldGrammar()
            : base("gold")
        {
            Parser oldSeparator = DefaultSeparator;
            // Special Terminals

            ExceptParser      parameterCh   = Terminals.Printable - Terminals.Set("\"'");
            AlternativeParser nonterminalCh = Terminals.LetterOrDigit | Terminals.Set("_-. ");
            AlternativeParser terminalCh    = Terminals.LetterOrDigit | Terminals.Set("_-.");
            ExceptParser      literalCh     = Terminals.Printable - Terminals.Set('\'');
            ExceptParser      setLiteralCh  = Terminals.Printable - Terminals.Set("[]'");
            ExceptParser      setNameCh     = Terminals.Printable - Terminals.Set("{}");

            SequenceParser    parameterName = ('"' & (+parameterCh).WithName("value") & '"').Separate();
            SequenceParser    nonterminal   = ('<' & (+nonterminalCh).WithName("value") & '>').Separate();
            AlternativeParser terminal      =
                ((+terminalCh).WithName("terminal") | ('\'' & (-literalCh).WithName("literal") & '\'')).Separate();
            SequenceParser setLiteral =
                ('[' & +(setLiteralCh.WithName("ch") | '\'' & (-literalCh).WithName("ch") & '\'') & ']').WithName(
                    "setLiteral");
            SequenceParser setName = ('{' & (+setNameCh).WithName("value") & '}').WithName("setName");

            // Line-Based Grammar Declarations

            var    comments = new GroupParser("!*", "*!", "!");
            Parser newline  = Terminals.Eol;

            whitespace       = -(Terminals.SingleLineWhiteSpace | comments);
            DefaultSeparator = whitespace;
            RepeatParser      nlOpt = -newline;
            AlternativeParser nl    = +newline | Terminals.End;

            // Parameter Definition

            AlternativeParser parameterItem = parameterName | terminal | setLiteral | setName | nonterminal;

            RepeatParser parameterItems = +parameterItem;

            SequenceParser parameterBody = parameterItems & -(nlOpt & '|' & parameterItems);

            parameter =
                (parameterName.Named("name") & nlOpt & '=' & parameterBody.WithName("body") & nl).WithName("parameter");

            // Set Definition

            AlternativeParser setItem = setLiteral | setName;

            var setExp = new AlternativeParser {
                Name = "setExp"
            };

            setExp.Add((setExp & nlOpt & '+' & setItem).WithName("add"),
                       (setExp & nlOpt & '-' & setItem).WithName("sub"),
                       setItem);


            setDecl = (setName & nlOpt & '=' & setExp & nl).WithName("setDecl");

            //  Terminal Definition

            var regExp2 = new SequenceParser();

            OptionalParser kleeneOpt = (~((Parser)'+' | '?' | '*')).WithName("kleene");

            regExpItem = ((setLiteral & kleeneOpt)
                          | (setName & kleeneOpt)
                          | (terminal.Named("terminal") & kleeneOpt)
                          | ('(' & regExp2.Named("regExp2") & ')' & kleeneOpt)).WithName("regExpItem");

            RepeatParser regExpSeq = (+regExpItem).WithName("regExpSeq");

            regExp2.Items.Add(regExpSeq);
            regExp2.Items.Add(-('|' & regExpSeq));

            regExp = (regExpSeq & -(nlOpt & '|' & regExpSeq)).WithName("regExp");

            RepeatParser terminalName = +terminal;

            terminalDecl = (terminalName.Named("name") & nlOpt & '=' & regExp & nl).WithName("terminalDecl");

            // Rule Definition
            symbol = (terminal.Named("terminal") | nonterminal.Named("nonterminal")).WithName("symbol");

            handle = (-symbol).WithName("handle");
            SequenceParser handles = handle & -(nlOpt & '|' & handle);

            ruleDecl = (nonterminal.Named("name") & nlOpt & "::=" & handles & nl).WithName("ruleDecl");

            // Rules

            AlternativeParser definitionDecl = parameter | setDecl | terminalDecl | ruleDecl;

            RepeatParser content = -definitionDecl;

            Inner = nlOpt & content & nlOpt;

            DefaultSeparator = oldSeparator;
            AttachEvents();
        }