public void AycockHorspoolAlgorithmShouldAcceptVulnerableGrammar()
        {
            var a = new TerminalLexerRule(
                new CharacterTerminal('a'),
                new TokenType("a"));

            ProductionExpression
                SPrime = "S'",
                S = "S",
                A = "A",
                E = "E";

            SPrime.Rule = S;
            S.Rule = (Expr)S | A + A + A + A;
            A.Rule = (Expr)"a" | E;

            var expression = new GrammarExpression(
                SPrime,
                new[] { SPrime, S, A, E });

            var grammar = expression.ToGrammar();

            var parseEngine = new ParseEngine(grammar);
            parseEngine.Pulse(new Token("a", 0, a.TokenType));

            var privateObject = new PrivateObject(parseEngine);
            var chart = privateObject.GetField("_chart") as Chart;

            Assert.IsNotNull(chart);
            Assert.AreEqual(2, chart.Count);
            Assert.IsTrue(parseEngine.IsAccepted());
        }
示例#2
0
 public void ChartEnqueShouldAvoidDuplication()
 {
     ProductionExpression L = "L";
     var aToZ = new RangeTerminal('a', 'z');
     L.Rule = L + aToZ | aToZ;
     var grammar = new GrammarExpression(L, new[] { L }).ToGrammar();
     var chart = new Chart();
     var firstState = new NormalState(grammar.Productions[0], 0, 1);
     var secondState = new NormalState(grammar.Productions[0], 0, 1);
     chart.Enqueue(0, firstState);
     chart.Enqueue(0, secondState);
     Assert.AreEqual(1, chart.EarleySets[0].Predictions.Count);
 }
        public void ParseEngineLexemeShouldConsumeCharacterSequence()
        {
            ProductionExpression sequence = "sequence";
            sequence.Rule = (Expr)'a' + 'b' + 'c' + '1' + '2' + '3';

            var grammar = new GrammarExpression(sequence, new[] { sequence }).ToGrammar();
            var lexerRule = new GrammarLexerRule(new TokenType("sequence"), grammar);
            var lexeme = new ParseEngineLexeme(lexerRule);
            var input = "abc123";
            for (int i = 0; i < input.Length; i++)
                Assert.IsTrue(lexeme.Scan(input[i]));
            Assert.IsTrue(lexeme.IsAccepted());
        }
        public void ParseEngineLexemeShouldMatchLongestAcceptableTokenWhenGivenAmbiguity()
        {
            var lexemeList = new List<ParseEngineLexeme>();

            ProductionExpression There = "there";
            There.Rule = (Expr)'t' + 'h' + 'e' + 'r' + 'e';
            var thereGrammar = new GrammarExpression(There, new[] { There })
                .ToGrammar();
            var thereLexerRule = new GrammarLexerRule(new TokenType(There.ProductionModel.LeftHandSide.NonTerminal.Value), thereGrammar);
            var thereLexeme = new ParseEngineLexeme(thereLexerRule);
            lexemeList.Add(thereLexeme);

            ProductionExpression Therefore = "therefore";
            Therefore.Rule = (Expr)'t' + 'h' + 'e' + 'r' + 'e' + 'f' + 'o' + 'r' + 'e';
            var thereforeGrammar = new GrammarExpression(Therefore, new[] { Therefore })
                .ToGrammar();
            var thereforeLexerRule = new GrammarLexerRule(new TokenType(Therefore.ProductionModel.LeftHandSide.NonTerminal.Value), thereforeGrammar);
            var thereforeLexeme = new ParseEngineLexeme(thereforeLexerRule);
            lexemeList.Add(thereforeLexeme);

            var input = "therefore";
            var i = 0;
            for (; i < input.Length; i++)
            {
                var passedLexemes = lexemeList
                    .Where(l => l.Scan(input[i]))
                    .ToList();

                // all existing lexemes have failed
                // fall back onto the lexemes that existed before
                // we read this character
                if (passedLexemes.Count() == 0)
                    break;

                lexemeList = passedLexemes;
            }

            Assert.AreEqual(i, input.Length);
            Assert.AreEqual(1, lexemeList.Count);
            var remainingLexeme = lexemeList[0];
            Assert.IsNotNull(remainingLexeme);
            Assert.IsTrue(remainingLexeme.IsAccepted());
        }
        public void ParseEngineLexemeShouldConsumeWhitespace()
        {
            ProductionExpression
                S = "S",
                W = "W";

            S.Rule = W | W + S;
            W.Rule = new WhitespaceTerminal();

            var grammar = new GrammarExpression(S, new[] { S, W }).ToGrammar();

            var lexerRule = new GrammarLexerRule(
                "whitespace",
                grammar);

            var lexeme = new ParseEngineLexeme(lexerRule);
            var input = "\t\r\n\v\f ";
            for (int i = 0; i < input.Length; i++)
                Assert.IsTrue(lexeme.Scan(input[i]), $"Unable to recognize input[{i}]");
            Assert.IsTrue(lexeme.IsAccepted());
        }
        public void EbnfGrammarGeneratorShouldCreateGrammarForMultipleOptionals()
        {
            // R = 'b' ['a'] 'c' ['d']
            var definition = new EbnfDefinition(
                new EbnfBlockRule(
                    new EbnfRule(
                        new EbnfQualifiedIdentifier("R"),
                        new EbnfExpression(
                            new EbnfTermConcatenation(
                                new EbnfFactorLiteral("b"),
                                new EbnfTermConcatenation(
                                    new EbnfFactorOptional(
                                        new EbnfExpression(
                                            new EbnfTerm(
                                                new EbnfFactorLiteral("a")))),
                                    new EbnfTermConcatenation(
                                        new EbnfFactorLiteral("c"),
                                        new EbnfTerm(
                                            new EbnfFactorOptional(
                                                new EbnfExpression(
                                                    new EbnfTerm(
                                                        new EbnfFactorLiteral("d"))))))))))));
            var grammar = GenerateGrammar(definition);
            Assert.IsNotNull(grammar);
            Assert.IsNotNull(grammar.Start);

            ProductionExpression
                R = "R",
                optA = "[a]",
                optD = "[d]";

            R.Rule =
                (Expr)'b' + optA + 'c' + optD ;
            optA.Rule = 'a' | (Expr)null;
            optD.Rule = 'd' | (Expr)null;

            var expectedGrammar = new GrammarExpression(R, new[] { R, optA, optD }).ToGrammar();
            Assert.AreEqual(expectedGrammar.Productions.Count, grammar.Productions.Count);
        }
示例#7
0
        static EbnfGrammar()
        {
            BaseLexerRule
                settingIdentifier = CreateSettingIdentifierLexerRule(),
                notDoubleQuote = CreateNotDoubleQuoteLexerRule(),
                notSingleQuote = CreateNotSingleQuoteLexerRule(),
                identifier = CreateIdentifierLexerRule(),
                any = new TerminalLexerRule(new AnyTerminal(), "."),
                notCloseBracket = new TerminalLexerRule(
                    new NegationTerminal(new CharacterTerminal(']')), "[^\\]]"),
                notMeta = CreateNotMetaLexerRule(),
                escapeCharacter = CreateEscapeCharacterLexerRule(),
                whitespace = CreateWhitespaceLexerRule(),
                multiLineComment = CreateMultiLineCommentLexerRule();

            ProductionExpression
                definition = Definition,
                block = Block,
                rule = Rule,
                setting = Setting,
                lexerRule = LexerRule,
                qualifiedIdentifier = QualifiedIdentifier,
                expression = Expression,
                term = Term,
                factor = Factor,
                literal = Literal,
                grouping = Grouping,
                repetition = Repetition,
                optional = Optional,
                lexerRuleExpression = LexerRuleExpression,
                lexerRuleTerm = LexerRuleTerm,
                lexerRuleFactor = LexerRuleFactor;

            var regexGrammar = new RegexGrammar();
            var regexProductionReference = new ProductionReferenceExpression(regexGrammar);

            definition.Rule =
                block
                | block + definition;

            block.Rule =
                rule
                | setting
                | lexerRule;

            rule.Rule =
                qualifiedIdentifier + '=' + expression + ';';

            setting.Rule = (Expr)
                settingIdentifier + '=' + qualifiedIdentifier + ';';

            lexerRule.Rule =
                qualifiedIdentifier + '~' + lexerRuleExpression + ';';

            expression.Rule =
                term
                | term + '|' + expression;

            term.Rule =
                factor
                | factor + term;

            factor.Rule
                = qualifiedIdentifier
                | literal
                | '/' + regexProductionReference + '/'
                | repetition
                | optional
                | grouping;

            literal.Rule = (Expr)
                '"' + notDoubleQuote + '"'
                | (Expr)"'" + notSingleQuote + "'";

            repetition.Rule = (Expr)
                '{' + expression + '}';

            optional.Rule = (Expr)
                '[' + expression + ']';

            grouping.Rule = (Expr)
                '(' + expression + ')';

            qualifiedIdentifier.Rule =
                identifier
                | (Expr)identifier + '.' + qualifiedIdentifier;

            lexerRuleExpression.Rule =
                lexerRuleTerm
                | lexerRuleTerm + '|' + lexerRuleExpression;

            lexerRuleTerm.Rule =
                lexerRuleFactor
                | lexerRuleFactor + lexerRuleTerm;

            lexerRuleFactor.Rule =
                literal
                | '/' + regexProductionReference + '/';

            var grammarExpression = new GrammarExpression(
                definition,
                new[]
                {
                    definition,
                    block,
                    rule,
                    setting,
                    lexerRule,
                    expression,
                    term,
                    factor,
                    literal,
                    repetition,
                    optional,
                    grouping,
                    qualifiedIdentifier,
                    lexerRuleExpression,
                    lexerRuleTerm,
                    lexerRuleFactor
                },
                new[] { new LexerRuleModel(whitespace), new LexerRuleModel(multiLineComment) });
            _ebnfGrammar = grammarExpression.ToGrammar();
        }
示例#8
0
 public ParseTester(GrammarExpression expression)
     : this(expression.ToGrammar())
 {
 }
        public void EbnfGrammarGeneratorShouldCreateGrammarForOptional()
        {
            // R = ['a']
            var definition = new EbnfDefinition(
                new EbnfBlockRule(
                    new EbnfRule(
                        new EbnfQualifiedIdentifier("R"),
                        new EbnfExpression(
                            new EbnfTerm(
                                new EbnfFactorOptional(
                                    new EbnfExpression(
                                        new EbnfTerm(
                                            new EbnfFactorLiteral("a")))))))));

            var grammar = GenerateGrammar(definition);
            Assert.IsNotNull(grammar);
            Assert.IsNotNull(grammar.Start);

            ProductionExpression
                R = "R",
                optA = "[a]";

            R.Rule = optA;
            optA.Rule = 'a'
                | (Expr)null;

            var expectedGrammar = new GrammarExpression(R, new[] { R, optA }).ToGrammar();
            Assert.AreEqual(expectedGrammar.Productions.Count, grammar.Productions.Count);
        }