public void Test_AycockHorspoolAlgorithm_That_Vulnerable_Grammar_Accepts_Input()
        {
            var a = new TerminalLexerRule(
                new Terminal('a'),
                new TokenType("a"));

            var grammar = new GrammarBuilder("S'")
            .Production("S'", r => r
                .Rule("S"))
            .Production("S", r => r
                .Rule("A", "A", "A", "A"))
            .Production("A", r => r
                .Rule(a)
                .Rule("E"))
            .Production("E", r => r
                .Lambda())
            .ToGrammar();

            var parseEngine = new ParseEngine(grammar);
            parseEngine.Pulse(new Token("a", 0, a.TokenType));

            var privateObject = new PrivateObject(parseEngine);
            var chart = privateObject.GetField("_chart") as Chart;

            Assert.IsNotNull(chart);
            Assert.AreEqual(2, chart.Count);
            Assert.IsTrue(parseEngine.IsAccepted());
        }
예제 #2
0
 public void Test_GrammarBuilder_That_LexerRule_With_Two_Calls_To_Range_Terminal_Method_Creates_One_LexerRule()
 {
     var grammarBuilder = new GrammarBuilder("A")
         .Production("A", r => r
             .Rule("B")
             .Rule("C"))
         .LexerRule("M", new CharacterClassTerminal(
             new RangeTerminal('a', 'z'),
             new RangeTerminal('A', 'Z')));
     var grammar = grammarBuilder.ToGrammar();
     Assert.AreEqual(1, grammar.LexerRules.Count);
 }
예제 #3
0
        public void Test_GrammarBuilder_That_Production_With_No_RHS_Adds_Empty_Production_To_List()
        {
            var grammarBuilder = new GrammarBuilder("A")
                .Production("A", r=>r.Lambda());

            var grammar = grammarBuilder.ToGrammar();
            Assert.IsNotNull(grammar);
            Assert.AreEqual(1, grammar.Productions.Count);

            var production = grammar.Productions[0];
            Assert.IsNotNull(production);

            Assert.AreEqual(0, production.RightHandSide.Count);
        }
예제 #4
0
        public void Test_Lexeme_That_Consumes_Character_Sequence()
        {
            var grammar = new GrammarBuilder("sequence")
                .Production("sequence", r => r
                    .Rule('a', 'b', 'c', '1', '2', '3'))
                .ToGrammar();

            var parseEngine = new ParseEngine(grammar);
            var lexeme = new ParseEngineLexeme(parseEngine, new TokenType("sequence"));
            var input = "abc123";
            for (int i = 0; i < input.Length; i++)
                Assert.IsTrue(lexeme.Scan(input[i]));
            Assert.IsTrue(lexeme.IsAccepted());
        }
예제 #5
0
 public void Test_Chart_That_Enqueue_Avoids_Duplicates()
 {
     var grammar = new GrammarBuilder("L")
         .Production("L", r => r
             .Rule("L", new RangeTerminal('a', 'z'))
             .Rule(new RangeTerminal('a','z')))
         .ToGrammar();
     var chart = new Chart();
     var firstState = new State(grammar.Productions[0], 0, 1);
     var secondState = new State(grammar.Productions[0], 0, 1);
     chart.Enqueue(0, firstState);
     chart.Enqueue(0, secondState);
     Assert.AreEqual(1, chart.EarleySets[0].Predictions.Count);
 }
예제 #6
0
        public void Test_GrammarBuilder_That_Production_With_Character_RHS_Adds_Terminal()
        {
            var grammarBuilder = new GrammarBuilder("A")
                .Production("A", r => r.Rule('a'));

            var grammar = grammarBuilder.ToGrammar();
            Assert.IsNotNull(grammar);
            Assert.AreEqual(1, grammar.Productions.Count);

            var production = grammar.Productions[0];
            Assert.AreEqual(1, production.RightHandSide.Count);

            var symbol = production.RightHandSide[0];
            Assert.IsNotNull(symbol);
            Assert.AreEqual(SymbolType.LexerRule, symbol.SymbolType);
        }
예제 #7
0
        public void Test_Lexeme_That_Matches_Longest_Acceptable_Token_When_Given_Ambiguity()
        {
            var lexemeList = new List<ParseEngineLexeme>();

            const string There = "there";
            var thereGrammar = new GrammarBuilder(There)
                .Production(There, r => r
                    .Rule('t', 'h', 'e', 'r', 'e'))
                .ToGrammar();
            var thereParseEngine = new ParseEngine(thereGrammar);
            var thereLexeme = new ParseEngineLexeme(thereParseEngine, new TokenType(There));
            lexemeList.Add(thereLexeme);

            const string Therefore = "therefore";
            var thereforeGrammar = new GrammarBuilder(Therefore)
                .Production(Therefore, r => r
                    .Rule('t', 'h', 'e', 'r', 'e', 'f', 'o', 'r', 'e'))
                .ToGrammar();
            var parseEngine = new ParseEngine(thereforeGrammar);
            var thereforeLexeme = new ParseEngineLexeme(parseEngine, new TokenType(Therefore));
            lexemeList.Add(thereforeLexeme);

            var input = "therefore";
            var i = 0;
            for (; i < input.Length; i++)
            {
                var passedLexemes = lexemeList
                    .Where(l => l.Scan(input[i]))
                    .ToList();

                // all existing lexemes have failed
                // fall back onto the lexemes that existed before
                // we read this character
                if (passedLexemes.Count() == 0)
                    break;

                lexemeList = passedLexemes;
            }

            Assert.AreEqual(i, input.Length);
            Assert.AreEqual(1, lexemeList.Count);
            var remainingLexeme = lexemeList[0];
            Assert.IsNotNull(remainingLexeme);
            Assert.IsTrue(remainingLexeme.IsAccepted());
        }
예제 #8
0
 public void Test_Grammar_That_RulesFor_Returns_Rules_When_Production_Matches()
 {
     var B = new NonTerminal("B");
     var A = new NonTerminal("A");
     var S = new NonTerminal("S");
     var grammarBuilder = new GrammarBuilder("S")
         .Production("S", r => r
             .Rule("A")
             .Rule("B"))
         .Production("A", r => r
             .Rule('a'))
         .Production("B", r => r
             .Rule('b'));
     var grammar = grammarBuilder.ToGrammar();
     var rules = grammar.RulesFor(A).ToList();
     Assert.AreEqual(1, rules.Count);
     Assert.AreEqual("A", rules[0].LeftHandSide.Value);
 }
예제 #9
0
        public void Test_Lexeme_That_Consumes_Whitespace()
        {
            var grammar = new GrammarBuilder("S")
                .Production("S", r=>r
                    .Rule("W")
                    .Rule("W", "S"))
                .Production("W", r => r
                    .Rule(new WhitespaceTerminal()))
                .ToGrammar();

            var lexerRule = new GrammarLexerRule(
                "whitespace",
                grammar);

            var parseEngine = new ParseEngine(lexerRule.Grammar);
            var lexeme = new ParseEngineLexeme(parseEngine, new TokenType("whitespace"));
            var input = "\t\r\n\v\f ";
            for (int i = 0; i < input.Length; i++)
                Assert.IsTrue(lexeme.Scan(input[i]));
            Assert.IsTrue(lexeme.IsAccepted());
        }
예제 #10
0
 public void Test_GrammarBuilder_That_Production_With_Two_Calls_To_RuleBuilder_Rule_Method_Creates_Two_Productions()
 {
     var grammarBuilder = new GrammarBuilder("A")
         .Production("A", r=>r
             .Rule("B")
             .Rule("C"));
     var grammar = grammarBuilder.ToGrammar();
     Assert.AreEqual(2, grammar.Productions.Count);
 }
예제 #11
0
        static RegexGrammar()
        {
            /*  Regex                      ->   Expression |
             *                                  '^' Expression |
             *                                  Expression '$' |
             *                                  '^' Expression '$'
             *
             *  Expresion                  ->   Term |
             *                                  Term '|' Expression
             *                                  λ
             *
             *  Term                       ->   Factor |
             *                                  Factor Term
             *
             *  Factor                     ->   Atom |
             *                                  Atom Iterator
             *
             *  Atom                       ->   . |
             *                                  Character |
             *                                  '(' Expression ')' |
             *                                  Set
             *
             *  Set                        ->   PositiveSet |
             *                                  NegativeSet
             *
             *  PositiveSet                ->   '[' CharacterClass ']'
             *
             *  NegativeSet                ->   '[^' CharacterClass ']'
             *
             *  CharacterClass             ->   CharacterRange |
             *                                  CharacterRange CharacterClass
             *
             *  CharacterRange             ->   CharacterClassCharacter |
             *                                  CharacterClassCharacter '-' CharacterClassCharacter
             *
             *  Character                  ->   NotMetaCharacter
             *                                  '\' AnyCharacter
             *                                  EscapeSequence
             *
             *  CharacterClassCharacter    ->   NotCloseBracketCharacter |
             *                                  '\' AnyCharacter
             */
            const string Regex = "Regex";
            const string Expression = "Expression";
            const string Term = "Term";
            const string Factor = "Factor";
            const string Atom = "Atom";
            const string Iterator = "Iterator";
            const string Set = "Set";
            const string PositiveSet = "PositiveSet";
            const string NegativeSet = "NegativeSet";
            const string CharacterClass = "CharacterClass";
            const string Character = "Character";
            const string CharacterRange = "CharacterRange";
            const string CharacterClassCharacter = "CharacterClassCharacter";
            const string NotCloseBracket = "NotCloseBracket";
            const string NotMetaCharacter = "NotMetaCharacter";

            var caret = new TerminalLexerRule('^');
            var dollar = new TerminalLexerRule('$');
            var pipe = new TerminalLexerRule('|');
            var dot = new TerminalLexerRule('.');
            var openParen = new TerminalLexerRule('(');
            var closeParen = new TerminalLexerRule(')');
            var star = new TerminalLexerRule('*');
            var plus = new TerminalLexerRule('+');
            var question = new TerminalLexerRule('?');
            var openBracket = new TerminalLexerRule('[');
            var closeBracket = new TerminalLexerRule(']');
            var notCloseBracket = new TerminalLexerRule(new NegationTerminal(new Terminal(']')), new TokenType("!]"));
            var dash = new TerminalLexerRule('-');
            var backslash = new TerminalLexerRule('\\');
            var notMeta = new TerminalLexerRule(
                new NegationTerminal(
                    new SetTerminal('.', '^', '$', '(', ')', '[', ']', '+', '*', '?', '\\')),
                new TokenType("not-meta"));
            var any = new TerminalLexerRule(new AnyTerminal(), new TokenType("any"));

            var grammarBuilder = new GrammarBuilder(Regex)
                .Production(Regex, r => r
                    .Rule(Expression)
                    .Rule(caret, Expression)
                    .Rule(Expression, dollar)
                    .Rule(caret, Expression, dollar))
                .Production(Expression, r => r
                    .Rule(Term)
                    .Rule(Term, pipe, Expression)
                    .Lambda())
                .Production(Term, r => r
                    .Rule(Factor)
                    .Rule(Factor, Term))
                .Production(Factor, r => r
                    .Rule(Atom)
                    .Rule(Atom, Iterator))
                .Production(Atom, r => r
                    .Rule(dot)
                    .Rule(Character)
                    .Rule(openParen, Expression, closeParen)
                    .Rule(Set))
                .Production(Iterator, r => r
                    .Rule(star)
                    .Rule(plus)
                    .Rule(question))
                .Production(Set, r => r
                    .Rule(PositiveSet)
                    .Rule(NegativeSet))
                .Production(PositiveSet, r => r
                    .Rule(openBracket, CharacterClass, closeBracket))
                .Production(NegativeSet, r => r
                    .Rule(openBracket, caret, CharacterClass, closeBracket))
                .Production(CharacterClass, r => r
                    .Rule(CharacterRange)
                    .Rule(CharacterRange, CharacterClass))
                .Production(CharacterRange, r => r
                    .Rule(CharacterClassCharacter)
                    .Rule(CharacterClassCharacter, dash, CharacterClassCharacter))
                .Production(Character, r => r
                    .Rule(NotMetaCharacter)
                    .Rule(backslash, any))
                .Production(CharacterClassCharacter, r => r
                    .Rule(NotCloseBracket)
                    .Rule(backslash, any))
                .Production(NotMetaCharacter, r => r
                    .Rule(notMeta))
                .Production(NotCloseBracket, r => r
                    .Rule(notCloseBracket))
                .LexerRule(caret)
                .LexerRule(dollar)
                .LexerRule(pipe)
                .LexerRule(dot)
                .LexerRule(openParen)
                .LexerRule(closeParen)
                .LexerRule(star)
                .LexerRule(plus)
                .LexerRule(question)
                .LexerRule(openBracket)
                .LexerRule(closeBracket)
                .LexerRule(notCloseBracket)
                .LexerRule(dash)
                .LexerRule(backslash)
                .LexerRule(notMeta)
                .LexerRule(any);
            _regexGrammar = grammarBuilder.ToGrammar();
        }