public BnfGrammar(bool enhanced = true) : base("bnf") { if (enhanced) { #if CORECLR foreach (var property in typeof(Terminals).GetTypeInfo().DeclaredProperties) #else foreach (var property in typeof(Terminals).GetProperties()) #endif { #if CORECLR if (typeof(Parser).GetTypeInfo().IsAssignableFrom(property.PropertyType.GetTypeInfo())) #else if (typeof(Parser).IsAssignableFrom(property.PropertyType)) #endif { var parser = property.GetValue(null, null) as Parser; baseLookup[property.Name] = parser.Named(property.Name); } } } var lineEnd = sws & +(sws & Terminals.Eol); literal = ( (sq & (+!sq).WithName("value").Optional() & sq) | (dq & (+!dq).WithName("value").Optional() & dq) ).WithName("parser"); RuleNameParser = "<" & Terminals.Set('>').Inverse().Repeat().WithName("name") & ">"; RuleParser = new AlternativeParser(); // defined later TermParser = literal | (ruleName = RuleNameParser.Named("parser")); TermParser.Name = "term"; if (enhanced) { TermParser.Items.Add('(' & sws & RuleParser & sws & ')'); TermParser.Items.Add(repeatRule = ('{' & sws & RuleParser & sws & '}').WithName("parser")); TermParser.Items.Add(optionalRule = ('[' & sws & RuleParser & sws & ']').WithName("parser")); } list = (TermParser & -(~((+Terminals.SingleLineWhiteSpace).WithName("ws")) & TermParser)).WithName("parser"); listRepeat = (list.Named("list") & ws & '|' & sws & ~(RuleParser.Named("expression"))).WithName("parser"); RuleParser.Items.Add(listRepeat); RuleParser.Items.Add(list); rule = (~lineEnd & sws & RuleNameParser.Named("ruleName") & ws & ruleSeparator & sws & RuleParser & lineEnd).WithName("parser"); Expresssions = new AlternativeParser(); Expresssions.Items.Add(rule); this.Inner = ws & +Expresssions & ws; AttachEvents(); }
Parser CharacterSet(Match match) { var alt = new AlternativeParser(); var inverse = match.Text.StartsWith("[^", StringComparison.Ordinal); var characters = new List <char>(); for (int i = 0; i < match.Matches.Count; i++) { Match child = match.Matches[i]; if (child.Name == null) { continue; } switch (child.Name) { case "character range": var first = Character(child.Matches.First(r => r.Name == "character")); var last = Character(child.Matches.Last(r => r.Name == "character")); if (first != null && last != null) { alt.Add(new CharRangeTerminal(first.Value, last.Value) { Inverse = inverse }); } break; case "character": var character = Character(child); if (character != null) { characters.Add(character.Value); } break; default: throw new FormatException(string.Format("Invalid character set child for text '{0}'", child.Text)); } } if (characters.Count > 0) { alt.Add(new CharSetTerminal(characters.ToArray()) { Inverse = inverse }); } if (alt.Items.Count > 1) { return(alt); } if (alt.Items.Count > 0) { return(alt.Items[0]); } return(new UnaryParser()); //throw new FormatException(string.Format("Character set has no characters '{0}'", match.Text)); }
public void NoMatchMatch() { Rule d = Rule.AssignParser(null, Prims.Digit); AlternativeParser rp = d | d | d; IScanner scan = Provider.Scanner; ParserMatch m = rp.Parse(scan); Assert.IsTrue(!m.Success); Assert.AreEqual(scan.Offset, 0); }
public void NoMatchMatch() { Rule d = new Rule("digit", Prims.Digit); AlternativeParser rp = d | d | d; IScanner scan = Provider.NewScanner; ParserMatch m = rp.Parse(scan); Assert.IsFalse(m.Success); Assert.AreEqual(0, scan.Offset); }
public JsonGrammar() : base("json") { EnableMatchEvents = false; CaseSensitive = true; // terminals var jstring = new StringParser { AllowEscapeCharacters = true, Name = "string" }; var jnumber = new NumberParser { AllowExponent = true, AllowSign = true, AllowDecimal = true, Name = "number" }; var jboolean = new BooleanTerminal { Name = "bool", TrueValues = new[] { "true" }, FalseValues = new[] { "false" }, CaseSensitive = false }; var jname = new StringParser { AllowEscapeCharacters = true, Name = "name" }; var jnull = new LiteralTerminal { Value = "null", Name = "null", CaseSensitive = false }; var ws = new RepeatCharTerminal(char.IsWhiteSpace); var commaDelimiter = new RepeatCharTerminal(new RepeatCharItem(char.IsWhiteSpace), ',', new RepeatCharItem(char.IsWhiteSpace)); // nonterminals (things we're interested in getting back) var jobject = new SequenceParser { Name = "object" }; var jarray = new SequenceParser { Name = "array" }; var jprop = new SequenceParser { Name = "property" }; // rules AlternativeParser jvalue = jstring | jnumber | jobject | jarray | jboolean | jnull; jobject.Add("{", (-jprop).SeparatedBy(commaDelimiter), "}"); jprop.Add(jname, ":", jvalue); jarray.Add("[", (-jvalue).SeparatedBy(commaDelimiter), "]"); // separate sequence and repeating parsers by whitespace jvalue.SeparateChildrenBy(ws, false); // allow whitespace before and after the initial object or array Inner = ws & (jobject | jarray) & ws; }
public EbnfGrammar() : base("ebnf") { DefineCommonNonTerminals = true; GenerateSpecialSequences(); // terminals AlternativeParser terminal_string = ("'" & (+Terminals.AnyChar).Until("'").WithName("value") & "'") | ("\"" & (+Terminals.AnyChar).Until("\"").WithName("value") & "\"") | ("’" & (+Terminals.AnyChar).Until("’").WithName("value") & "’"); SequenceParser special_sequence = ("?" & (+Terminals.AnyChar).Until("?").WithName("name") & "?").WithName("special sequence"); SequenceParser meta_identifier_terminal = Terminals.Letter & -(Terminals.LetterOrDigit | '_'); var integer = new NumberParser(); Parser old = DefaultSeparator; DefaultSeparator = cws; // nonterminals var definition_list = new UnaryParser("definition list"); var single_definition = new UnaryParser("single definition"); var term = new UnaryParser("term"); var primary = new UnaryParser("primary"); var exception = new UnaryParser("exception"); var factor = new UnaryParser("factor"); var meta_identifier = new UnaryParser("meta identifier"); var syntax_rule = new UnaryParser("syntax rule"); var rule_equals = new UnaryParser("equals"); SequenceParser optional_sequence = ("[" & definition_list & "]").WithName("optional sequence"); SequenceParser repeated_sequence = ("{" & definition_list & "}").WithName("repeated sequence"); SequenceParser grouped_sequence = ("(" & definition_list & ")").WithName("grouped sequence"); // rules meta_identifier.Inner = (+meta_identifier_terminal).SeparatedBy(ws); primary.Inner = optional_sequence | repeated_sequence | special_sequence | grouped_sequence | meta_identifier | terminal_string.Named("terminal string") | null; factor.Inner = ~(integer.Named("integer") & "*") & primary; term.Inner = factor & ~("-" & exception); exception.Inner = term; single_definition.Inner = term & -("," & term); definition_list.Inner = single_definition & -("|" & single_definition); rule_equals.Inner = (Parser)"=" | ":="; syntax_rule.Inner = meta_identifier & rule_equals & definition_list & ";"; Inner = cws & +syntax_rule & cws; DefaultSeparator = old; AttachEvents(); }
public void ThirdMatch() { Rule d = Rule.AssignParser(null, Prims.Digit); Rule l = Rule.AssignParser(null, Prims.Letter); AlternativeParser rp = d | d | l; IScanner scan = Provider.Scanner; ParserMatch m = rp.Parse(scan); Assert.IsTrue(m.Success); Assert.AreEqual(m.Length, 1); Assert.AreEqual(scan.Offset, 1); }
public void ThirdMatch() { Rule d = new Rule("digit", Prims.Digit); Rule l = new Rule("letter", Prims.Letter); AlternativeParser rp = d | d | l; IScanner scan = Provider.NewScanner; ParserMatch m = rp.Parse(scan); Assert.IsTrue(m.Success); Assert.AreEqual(1, m.Length); Assert.AreEqual(1, scan.Offset); }
public void FirstMatch() { Rule d = new Rule(); Rule l = new Rule(); d.Parser = Prims.Digit; l.Parser = Prims.Letter; AlternativeParser rp = l | d; IScanner scan = Provider.Scanner; ParserMatch m = rp.Parse(scan); Assert.IsTrue(m.Success); Assert.AreEqual(m.Length, 1); Assert.AreEqual(scan.Offset, 1); }
public BnfGrammar(bool enhanced = true) : base("bnf") { if (enhanced) { foreach (var terminal in Terminals.GetTerminals()) { baseLookup[terminal.Item1] = terminal.Item2.Named(terminal.Item1); } } literal = ( (sq & (+!sq).WithName("value").Optional() & sq) | (dq & (+!dq).WithName("value").Optional() & dq) | ((+(Terminals.WhiteSpace.Inverse().Except(Terminals.Set("<[{(|)}]>"))))).WithName("value") ).WithName("parser"); RuleNameParser = "<" & Terminals.Set("<>").Inverse().Repeat().WithName("name") & ">"; RuleParser = new AlternativeParser(); // defined later TermParser = ((ruleName = RuleNameParser.Named("parser")).NotFollowedBy(ows & ruleSeparator)) | literal; TermParser.Name = "term"; if (enhanced) { TermParser.Items.Add('(' & ows & RuleParser & ows & ')'); TermParser.Items.Add(repeatRule = ('{' & ows & RuleParser & ows & '}').WithName("parser")); TermParser.Items.Add(optionalRule = ('[' & ows & RuleParser & ows & ']').WithName("parser")); } TermParser.Items.Add((ows & RuleNameParser & ows & ruleSeparator).Not() & Terminals.Set("<[{(}]>").WithName("value").Named("parser")); list = (TermParser & -(~(rws.Named("ws")) & TermParser)).WithName("parser"); listRepeat = (list.Named("list") & ows & '|' & ~(ows & RuleParser.Named("expression"))).WithName("parser"); RuleParser.Items.Add(listRepeat); RuleParser.Items.Add(list); rule = (RuleNameParser.Named("ruleName") & ows & ruleSeparator & ows & RuleParser).WithName("parser"); Expresssions = new AlternativeParser(); Expresssions.Items.Add(rule); this.Inner = ows & (+Expresssions).SeparatedBy(rws) & ows; AttachEvents(); }
void CreateSeparator() { var alt = new AlternativeParser(); var p = Comment; if (p != null) { alt.Items.Add(p); } p = Whitespace; if (p != null) { alt.Items.Add(p); } if (alt.Items.Count == 0) { separator = null; } else { separator = -alt; } }
public BnfGrammar(BnfStyle style = BnfStyle.All) : base("bnf") { Parser ows = Terminals.WhiteSpace.Repeat(0); Parser rws = Terminals.WhiteSpace.Repeat(1); if (style.HasFlag(BnfStyle.CommonTerminals)) { foreach (var terminal in Terminals.GetTerminals()) { baseLookup[terminal.Item1] = terminal.Item2; } } if (style.HasFlag(BnfStyle.Comments)) { // allow ebnf comments var comment = new GroupParser("(*", "*)"); ows = (Terminals.WhiteSpace | comment).Repeat(0); rws = (Terminals.WhiteSpace | comment).Repeat(1); } literal = ( (sq & (+!sq).WithName("value").Optional() & sq) | (dq & (+!dq).WithName("value").Optional() & dq) | (+Terminals.Set(" \n\r<[{(|)}]>").Inverse()).WithName("value") ).WithName("parser"); RuleNameParser = "<" & Terminals.Set("<>\n\r").Inverse().Repeat().WithName("name") & ">"; RuleParser = new AlternativeParser(); // defined later TermParser = new AlternativeParser(); TermParser.Name = "term"; TermParser.Add((ruleName = RuleNameParser.Named("parser")).NotFollowedBy(ows & ruleSeparator)); if (style.HasFlag(BnfStyle.Cardinality)) { TermParser.Items.Add('(' & ows & RuleParser & ows & ')'); TermParser.Items.Add(repeatRule = ('{' & ows & RuleParser & ows & '}').WithName("parser")); TermParser.Items.Add(optionalRule = ('[' & ows & RuleParser & ows & ']').WithName("parser")); } TermParser.Items.Add(literal); var rep = -(~((+Terminals.WhiteSpace).WithName("ws")) & TermParser); rep.Name = "rep"; rep.AddMatch = false; list = (TermParser & rep).WithName("parser"); listRepeat = (list.Named("list") & ows & '|' & ~(ows & RuleParser.Named("expression"))).WithName("parser"); RuleParser.Items.Add(listRepeat); RuleParser.Items.Add(list); RuleParser.Items.Add((ows & RuleNameParser & ows & ruleSeparator).Not() & Terminals.WhiteSpace.Inverse().Repeat().WithName("value").Named("parser")); rule = (RuleNameParser.Named("ruleName") & ows & ruleSeparator & ows & RuleParser).WithName("parser"); Expresssions = new AlternativeParser(); Expresssions.Items.Add(rule); this.Inner = ows & (+Expresssions).SeparatedBy(rws) & ows; AttachEvents(); }
public EbnfGrammar(EbnfStyle style) : base("ebnf") { Style = style; DefineCommonNonTerminals = true; GenerateSpecialSequences(); // terminals var comment = style.HasFlag(EbnfStyle.BracketComments) ? new GroupParser("(*", "*)") : new GroupParser("/*", "*/"); var ows = -(Terminals.WhiteSpace | comment); var rws = +(Terminals.WhiteSpace | comment); var hex_character = ("#x" & +Terminals.HexDigit); var character = (("\\" & Terminals.AnyChar) | hex_character | Terminals.AnyChar.Except("]")).WithName("character"); var character_range = (character & "-" & character).WithName("character range"); var character_set = ("[" & ~(Parser)"^" & +(character_range | character) & "]").WithName("character set"); var terminal_string = new StringParser { QuoteCharacters = new [] { '\"', '\'', '’' }, Name = "terminal string" }; var special_sequence = ("?" & (+Terminals.AnyChar).Until("?").WithName("name") & "?").WithName("special sequence"); var meta_identifier_terminal = Terminals.Letter & -(Terminals.LetterOrDigit | '_'); var integer = new NumberParser().WithName("integer"); // nonterminals var definition_list = new RepeatParser(0).WithName("definition list"); var single_definition = new RepeatParser(1).WithName("single definition"); var term = new SequenceParser().WithName("term"); var primary = new AlternativeParser().WithName("primary"); var exception = new UnaryParser("exception"); var factor = new SequenceParser().WithName("factor"); var meta_identifier = new RepeatParser(1).WithName("meta identifier"); var syntax_rule = new SequenceParser().WithName("syntax rule"); var rule_equals = new AlternativeParser().WithName("equals"); Parser meta_reference = meta_identifier; Parser grouped_sequence = ("(" & ows & definition_list & ows & ")").WithName("grouped sequence"); if (style.HasFlag(EbnfStyle.SquareBracketAsOptional)) { primary.Add(("[" & ows & definition_list & ows & "]").WithName("optional sequence")); } if (!style.HasFlag(EbnfStyle.CardinalityFlags)) { var repeated_sequence = ("{" & ows & definition_list & ows & "}").WithName("repeated sequence"); primary.Add(repeated_sequence); } // rules meta_identifier.Inner = meta_identifier_terminal; meta_identifier.Separator = +(Terminals.SingleLineWhiteSpace); if (!style.HasFlag(EbnfStyle.CommaSeparator)) { // w3c identifiers must be a single word meta_identifier.Maximum = 1; meta_reference = meta_reference.NotFollowedBy(ows & rule_equals); } primary.Add(grouped_sequence, meta_reference, terminal_string, special_sequence); if (style.HasFlag(EbnfStyle.CharacterSets) && !style.HasFlag(EbnfStyle.SquareBracketAsOptional)) { // w3c supports character sets primary.Add(hex_character.Named("hex character")); primary.Add(character_set); } if (style.HasFlag(EbnfStyle.NumericCardinality)) { factor.Add(~(integer & ows & "*" & ows)); } factor.Add(primary); if (style.HasFlag(EbnfStyle.CardinalityFlags)) { // w3c defines cardinality at the end of a factor var flags = style.HasFlag(EbnfStyle.SquareBracketAsOptional) ? "*+" : "?*+"; factor.Add(~(ows & Terminals.Set(flags).WithName("cardinality"))); } term.Add(factor, ~(ows & "-" & ows & exception)); exception.Inner = term; single_definition.Inner = term; single_definition.Separator = style.HasFlag(EbnfStyle.CommaSeparator) ? (Parser)(ows & "," & ows) : ows; definition_list.Inner = single_definition; definition_list.Separator = ows & "|" & ows; rule_equals.Add(style.HasFlag(EbnfStyle.DoubleColonEquals) ? "::=" : "=", ":="); syntax_rule.Add(meta_identifier, ows, rule_equals, ows, definition_list); if (style.HasFlag(EbnfStyle.SemicolonTerminator)) { syntax_rule.Add(ows, ";"); // iso rules are terminated by a semicolon } var syntax_rules = +syntax_rule; syntax_rules.Separator = style.HasFlag(EbnfStyle.SemicolonTerminator) ? ows : rws; Inner = ows & syntax_rules & ows; AttachEvents(); }
public GoldGrammar() : base("gold") { Parser oldSeparator = DefaultSeparator; // Special Terminals ExceptParser parameterCh = Terminals.Printable - Terminals.Set("\"'"); AlternativeParser nonterminalCh = Terminals.LetterOrDigit | Terminals.Set("_-. "); AlternativeParser terminalCh = Terminals.LetterOrDigit | Terminals.Set("_-."); ExceptParser literalCh = Terminals.Printable - Terminals.Set('\''); ExceptParser setLiteralCh = Terminals.Printable - Terminals.Set("[]'"); ExceptParser setNameCh = Terminals.Printable - Terminals.Set("{}"); SequenceParser parameterName = ('"' & (+parameterCh).WithName("value") & '"').Separate(); SequenceParser nonterminal = ('<' & (+nonterminalCh).WithName("value") & '>').Separate(); AlternativeParser terminal = ((+terminalCh).WithName("terminal") | ('\'' & (-literalCh).WithName("literal") & '\'')).Separate(); SequenceParser setLiteral = ('[' & +(setLiteralCh.WithName("ch") | '\'' & (-literalCh).WithName("ch") & '\'') & ']').WithName( "setLiteral"); SequenceParser setName = ('{' & (+setNameCh).WithName("value") & '}').WithName("setName"); // Line-Based Grammar Declarations var comments = new GroupParser("!*", "*!", "!"); Parser newline = Terminals.Eol; whitespace = -(Terminals.SingleLineWhiteSpace | comments); DefaultSeparator = whitespace; RepeatParser nlOpt = -newline; AlternativeParser nl = +newline | Terminals.End; // Parameter Definition AlternativeParser parameterItem = parameterName | terminal | setLiteral | setName | nonterminal; RepeatParser parameterItems = +parameterItem; SequenceParser parameterBody = parameterItems & -(nlOpt & '|' & parameterItems); parameter = (parameterName.Named("name") & nlOpt & '=' & parameterBody.WithName("body") & nl).WithName("parameter"); // Set Definition AlternativeParser setItem = setLiteral | setName; var setExp = new AlternativeParser { Name = "setExp" }; setExp.Add((setExp & nlOpt & '+' & setItem).WithName("add"), (setExp & nlOpt & '-' & setItem).WithName("sub"), setItem); setDecl = (setName & nlOpt & '=' & setExp & nl).WithName("setDecl"); // Terminal Definition var regExp2 = new SequenceParser(); OptionalParser kleeneOpt = (~((Parser)'+' | '?' | '*')).WithName("kleene"); regExpItem = ((setLiteral & kleeneOpt) | (setName & kleeneOpt) | (terminal.Named("terminal") & kleeneOpt) | ('(' & regExp2.Named("regExp2") & ')' & kleeneOpt)).WithName("regExpItem"); RepeatParser regExpSeq = (+regExpItem).WithName("regExpSeq"); regExp2.Items.Add(regExpSeq); regExp2.Items.Add(-('|' & regExpSeq)); regExp = (regExpSeq & -(nlOpt & '|' & regExpSeq)).WithName("regExp"); RepeatParser terminalName = +terminal; terminalDecl = (terminalName.Named("name") & nlOpt & '=' & regExp & nl).WithName("terminalDecl"); // Rule Definition symbol = (terminal.Named("terminal") | nonterminal.Named("nonterminal")).WithName("symbol"); handle = (-symbol).WithName("handle"); SequenceParser handles = handle & -(nlOpt & '|' & handle); ruleDecl = (nonterminal.Named("name") & nlOpt & "::=" & handles & nl).WithName("ruleDecl"); // Rules AlternativeParser definitionDecl = parameter | setDecl | terminalDecl | ruleDecl; RepeatParser content = -definitionDecl; Inner = nlOpt & content & nlOpt; DefaultSeparator = oldSeparator; AttachEvents(); }
private void AttachEvents() { ruleName.Matched += m => { Parser parser; string name = m["name"].Text; if (!parserLookup.TryGetValue(name, out parser) && !baseLookup.TryGetValue(name, out parser)) { parser = Terminals.LetterOrDigit.Repeat(); parser.Name = name; } m.Tag = parser; }; literal.Matched += m => m.Tag = new LiteralTerminal(m["value"].Text); optionalRule.Matched += m => m.Tag = new OptionalParser((Parser)m["parser"].Tag); repeatRule.Matched += m => m.Tag = new RepeatParser((Parser)m["parser"].Tag, 0) { Separator = sws }; list.Matched += m => { if (m.Matches.Count > 1) { var parser = new SequenceParser(); foreach (Match child in m.Matches) { if (child.Parser.Name == "ws") { parser.Items.Add(sws); } else if (child.Parser.Name == "term") { parser.Items.Add((Parser)child["parser"].Tag); } } m.Tag = parser; } else { m.Tag = m["term"]["parser"].Tag; } }; listRepeat.Matched += m => { // collapse alternatives to one alternative parser var parser = (Parser)m["expression"]["parser"].Tag; AlternativeParser alt = parser as AlternativeParser ?? new AlternativeParser(parser); alt.Items.Insert(0, (Parser)m["list"]["parser"].Tag); m.Tag = alt; }; rule.Matched += m => { var parser = (UnaryParser)m.Tag; parser.Inner = (Parser)m["parser"].Tag; m.Tag = parser; }; rule.PreMatch += m => { string name = m["ruleName"]["name"].Text; Parser parser; if (name == startParserName) { parser = new Grammar(name); } else { parser = new UnaryParser(name); } m.Tag = parser; parserLookup[parser.Name] = parser; }; }