public void Test_AycockHorspoolAlgorithm_That_Vulnerable_Grammar_Accepts_Input() { var a = new TerminalLexerRule( new Terminal('a'), new TokenType("a")); var grammar = new GrammarBuilder("S'") .Production("S'", r => r .Rule("S")) .Production("S", r => r .Rule("A", "A", "A", "A")) .Production("A", r => r .Rule(a) .Rule("E")) .Production("E", r => r .Lambda()) .ToGrammar(); var parseEngine = new ParseEngine(grammar); parseEngine.Pulse(new Token("a", 0, a.TokenType)); var privateObject = new PrivateObject(parseEngine); var chart = privateObject.GetField("_chart") as Chart; Assert.IsNotNull(chart); Assert.AreEqual(2, chart.Count); Assert.IsTrue(parseEngine.IsAccepted()); }
public void Test_GrammarBuilder_That_LexerRule_With_Two_Calls_To_Range_Terminal_Method_Creates_One_LexerRule() { var grammarBuilder = new GrammarBuilder("A") .Production("A", r => r .Rule("B") .Rule("C")) .LexerRule("M", new CharacterClassTerminal( new RangeTerminal('a', 'z'), new RangeTerminal('A', 'Z'))); var grammar = grammarBuilder.ToGrammar(); Assert.AreEqual(1, grammar.LexerRules.Count); }
public void Test_GrammarBuilder_That_Production_With_No_RHS_Adds_Empty_Production_To_List() { var grammarBuilder = new GrammarBuilder("A") .Production("A", r=>r.Lambda()); var grammar = grammarBuilder.ToGrammar(); Assert.IsNotNull(grammar); Assert.AreEqual(1, grammar.Productions.Count); var production = grammar.Productions[0]; Assert.IsNotNull(production); Assert.AreEqual(0, production.RightHandSide.Count); }
public void Test_Lexeme_That_Consumes_Character_Sequence() { var grammar = new GrammarBuilder("sequence") .Production("sequence", r => r .Rule('a', 'b', 'c', '1', '2', '3')) .ToGrammar(); var parseEngine = new ParseEngine(grammar); var lexeme = new ParseEngineLexeme(parseEngine, new TokenType("sequence")); var input = "abc123"; for (int i = 0; i < input.Length; i++) Assert.IsTrue(lexeme.Scan(input[i])); Assert.IsTrue(lexeme.IsAccepted()); }
public void Test_Chart_That_Enqueue_Avoids_Duplicates() { var grammar = new GrammarBuilder("L") .Production("L", r => r .Rule("L", new RangeTerminal('a', 'z')) .Rule(new RangeTerminal('a','z'))) .ToGrammar(); var chart = new Chart(); var firstState = new State(grammar.Productions[0], 0, 1); var secondState = new State(grammar.Productions[0], 0, 1); chart.Enqueue(0, firstState); chart.Enqueue(0, secondState); Assert.AreEqual(1, chart.EarleySets[0].Predictions.Count); }
public void Test_GrammarBuilder_That_Production_With_Character_RHS_Adds_Terminal() { var grammarBuilder = new GrammarBuilder("A") .Production("A", r => r.Rule('a')); var grammar = grammarBuilder.ToGrammar(); Assert.IsNotNull(grammar); Assert.AreEqual(1, grammar.Productions.Count); var production = grammar.Productions[0]; Assert.AreEqual(1, production.RightHandSide.Count); var symbol = production.RightHandSide[0]; Assert.IsNotNull(symbol); Assert.AreEqual(SymbolType.LexerRule, symbol.SymbolType); }
public void Test_Lexeme_That_Matches_Longest_Acceptable_Token_When_Given_Ambiguity() { var lexemeList = new List<ParseEngineLexeme>(); const string There = "there"; var thereGrammar = new GrammarBuilder(There) .Production(There, r => r .Rule('t', 'h', 'e', 'r', 'e')) .ToGrammar(); var thereParseEngine = new ParseEngine(thereGrammar); var thereLexeme = new ParseEngineLexeme(thereParseEngine, new TokenType(There)); lexemeList.Add(thereLexeme); const string Therefore = "therefore"; var thereforeGrammar = new GrammarBuilder(Therefore) .Production(Therefore, r => r .Rule('t', 'h', 'e', 'r', 'e', 'f', 'o', 'r', 'e')) .ToGrammar(); var parseEngine = new ParseEngine(thereforeGrammar); var thereforeLexeme = new ParseEngineLexeme(parseEngine, new TokenType(Therefore)); lexemeList.Add(thereforeLexeme); var input = "therefore"; var i = 0; for (; i < input.Length; i++) { var passedLexemes = lexemeList .Where(l => l.Scan(input[i])) .ToList(); // all existing lexemes have failed // fall back onto the lexemes that existed before // we read this character if (passedLexemes.Count() == 0) break; lexemeList = passedLexemes; } Assert.AreEqual(i, input.Length); Assert.AreEqual(1, lexemeList.Count); var remainingLexeme = lexemeList[0]; Assert.IsNotNull(remainingLexeme); Assert.IsTrue(remainingLexeme.IsAccepted()); }
public void Test_Grammar_That_RulesFor_Returns_Rules_When_Production_Matches() { var B = new NonTerminal("B"); var A = new NonTerminal("A"); var S = new NonTerminal("S"); var grammarBuilder = new GrammarBuilder("S") .Production("S", r => r .Rule("A") .Rule("B")) .Production("A", r => r .Rule('a')) .Production("B", r => r .Rule('b')); var grammar = grammarBuilder.ToGrammar(); var rules = grammar.RulesFor(A).ToList(); Assert.AreEqual(1, rules.Count); Assert.AreEqual("A", rules[0].LeftHandSide.Value); }
public void Test_Lexeme_That_Consumes_Whitespace() { var grammar = new GrammarBuilder("S") .Production("S", r=>r .Rule("W") .Rule("W", "S")) .Production("W", r => r .Rule(new WhitespaceTerminal())) .ToGrammar(); var lexerRule = new GrammarLexerRule( "whitespace", grammar); var parseEngine = new ParseEngine(lexerRule.Grammar); var lexeme = new ParseEngineLexeme(parseEngine, new TokenType("whitespace")); var input = "\t\r\n\v\f "; for (int i = 0; i < input.Length; i++) Assert.IsTrue(lexeme.Scan(input[i])); Assert.IsTrue(lexeme.IsAccepted()); }
public void Test_GrammarBuilder_That_Production_With_Two_Calls_To_RuleBuilder_Rule_Method_Creates_Two_Productions() { var grammarBuilder = new GrammarBuilder("A") .Production("A", r=>r .Rule("B") .Rule("C")); var grammar = grammarBuilder.ToGrammar(); Assert.AreEqual(2, grammar.Productions.Count); }
static RegexGrammar() { /* Regex -> Expression | * '^' Expression | * Expression '$' | * '^' Expression '$' * * Expresion -> Term | * Term '|' Expression * λ * * Term -> Factor | * Factor Term * * Factor -> Atom | * Atom Iterator * * Atom -> . | * Character | * '(' Expression ')' | * Set * * Set -> PositiveSet | * NegativeSet * * PositiveSet -> '[' CharacterClass ']' * * NegativeSet -> '[^' CharacterClass ']' * * CharacterClass -> CharacterRange | * CharacterRange CharacterClass * * CharacterRange -> CharacterClassCharacter | * CharacterClassCharacter '-' CharacterClassCharacter * * Character -> NotMetaCharacter * '\' AnyCharacter * EscapeSequence * * CharacterClassCharacter -> NotCloseBracketCharacter | * '\' AnyCharacter */ const string Regex = "Regex"; const string Expression = "Expression"; const string Term = "Term"; const string Factor = "Factor"; const string Atom = "Atom"; const string Iterator = "Iterator"; const string Set = "Set"; const string PositiveSet = "PositiveSet"; const string NegativeSet = "NegativeSet"; const string CharacterClass = "CharacterClass"; const string Character = "Character"; const string CharacterRange = "CharacterRange"; const string CharacterClassCharacter = "CharacterClassCharacter"; const string NotCloseBracket = "NotCloseBracket"; const string NotMetaCharacter = "NotMetaCharacter"; var caret = new TerminalLexerRule('^'); var dollar = new TerminalLexerRule('$'); var pipe = new TerminalLexerRule('|'); var dot = new TerminalLexerRule('.'); var openParen = new TerminalLexerRule('('); var closeParen = new TerminalLexerRule(')'); var star = new TerminalLexerRule('*'); var plus = new TerminalLexerRule('+'); var question = new TerminalLexerRule('?'); var openBracket = new TerminalLexerRule('['); var closeBracket = new TerminalLexerRule(']'); var notCloseBracket = new TerminalLexerRule(new NegationTerminal(new Terminal(']')), new TokenType("!]")); var dash = new TerminalLexerRule('-'); var backslash = new TerminalLexerRule('\\'); var notMeta = new TerminalLexerRule( new NegationTerminal( new SetTerminal('.', '^', '$', '(', ')', '[', ']', '+', '*', '?', '\\')), new TokenType("not-meta")); var any = new TerminalLexerRule(new AnyTerminal(), new TokenType("any")); var grammarBuilder = new GrammarBuilder(Regex) .Production(Regex, r => r .Rule(Expression) .Rule(caret, Expression) .Rule(Expression, dollar) .Rule(caret, Expression, dollar)) .Production(Expression, r => r .Rule(Term) .Rule(Term, pipe, Expression) .Lambda()) .Production(Term, r => r .Rule(Factor) .Rule(Factor, Term)) .Production(Factor, r => r .Rule(Atom) .Rule(Atom, Iterator)) .Production(Atom, r => r .Rule(dot) .Rule(Character) .Rule(openParen, Expression, closeParen) .Rule(Set)) .Production(Iterator, r => r .Rule(star) .Rule(plus) .Rule(question)) .Production(Set, r => r .Rule(PositiveSet) .Rule(NegativeSet)) .Production(PositiveSet, r => r .Rule(openBracket, CharacterClass, closeBracket)) .Production(NegativeSet, r => r .Rule(openBracket, caret, CharacterClass, closeBracket)) .Production(CharacterClass, r => r .Rule(CharacterRange) .Rule(CharacterRange, CharacterClass)) .Production(CharacterRange, r => r .Rule(CharacterClassCharacter) .Rule(CharacterClassCharacter, dash, CharacterClassCharacter)) .Production(Character, r => r .Rule(NotMetaCharacter) .Rule(backslash, any)) .Production(CharacterClassCharacter, r => r .Rule(NotCloseBracket) .Rule(backslash, any)) .Production(NotMetaCharacter, r => r .Rule(notMeta)) .Production(NotCloseBracket, r => r .Rule(notCloseBracket)) .LexerRule(caret) .LexerRule(dollar) .LexerRule(pipe) .LexerRule(dot) .LexerRule(openParen) .LexerRule(closeParen) .LexerRule(star) .LexerRule(plus) .LexerRule(question) .LexerRule(openBracket) .LexerRule(closeBracket) .LexerRule(notCloseBracket) .LexerRule(dash) .LexerRule(backslash) .LexerRule(notMeta) .LexerRule(any); _regexGrammar = grammarBuilder.ToGrammar(); }