Exemplo n.º 1
0
        static BnfGrammar()
        {
            /*
             *  <grammar>        ::= <rule> | <rule> <grammar>
             *  <rule>           ::= "<" <rule-name> ">" "::=" <expression>
             *  <expression>     ::= <list> | <list> "|" <expression>
             *  <line-end>       ::= <EOL> | <line-end> <line-end>
             *  <list>           ::= <term> | <term> <list>
             *  <term>           ::= <literal> | "<" <rule-name> ">"
             *  <literal>        ::= '"' <text> '"' | "'" <text> "'"
             */
            var whitespace      = CreateWhitespaceLexerRule();
            var ruleName        = CreateRuleNameLexerRule();
            var implements      = CreateImplementsLexerRule();
            var eol             = CreateEndOfLineLexerRule();
            var notDoubleQuote  = CreateNotDoubleQuoteLexerRule();
            var notSingleQuuote = CreateNotSingleQuoteLexerRule();

            var grammar         = new NonTerminal("grammar");
            var rule            = new NonTerminal("rule");
            var identifier      = new NonTerminal("identifier");
            var expression      = new NonTerminal("expression");
            var lineEnd         = new NonTerminal("line-end");
            var list            = new NonTerminal("list");
            var term            = new NonTerminal("term");
            var literal         = new NonTerminal("literal");
            var doubleQuoteText = new NonTerminal("doubleQuoteText");
            var singleQuoteText = new NonTerminal("singleQuoteText");

            var lessThan    = new TerminalLexerRule('<');
            var greaterThan = new TerminalLexerRule('>');
            var doubleQuote = new TerminalLexerRule('"');
            var slash       = new TerminalLexerRule('\'');
            var pipe        = new TerminalLexerRule('|');

            var productions = new[]
            {
                new Production(grammar, rule),
                new Production(grammar, rule, grammar),
                new Production(rule, identifier, implements, expression),
                new Production(expression, list),
                new Production(expression, list, pipe, expression),
                new Production(lineEnd, eol),
                new Production(lineEnd, lineEnd, lineEnd),
                new Production(list, term),
                new Production(list, term, list),
                new Production(term, literal),
                new Production(term, identifier),
                new Production(identifier, lessThan, ruleName, greaterThan),
                new Production(literal, doubleQuote, notDoubleQuote, doubleQuote),
                new Production(literal, slash, notSingleQuuote, slash)
            };

            var ignore = new[]
            {
                whitespace
            };

            _bnfGrammar = new Grammar(grammar, productions, ignore);
        }
Exemplo n.º 2
0
        public void AycockHorspoolAlgorithmShouldAcceptVulnerableGrammar()
        {
            var a = new TerminalLexerRule(
                new CharacterTerminal('a'),
                new TokenName("a"));

            ProductionExpression
                SPrime = "S'",
                S      = "S",
                A      = "A",
                E      = "E";

            SPrime.Rule = S;
            S.Rule      = (Expr)S | (A + A + A + A);
            A.Rule      = (Expr)"a" | E;

            var expression = new GrammarExpression(
                SPrime,
                new[] { SPrime, S, A, E });

            var grammar = expression.ToGrammar();

            var parseEngine = new ParseEngine(grammar);

            parseEngine.Pulse(new VerbatimToken(a.TokenName, 0, "a"));

            //var privateObject = new PrivateObject(parseEngine);
            //var chart = privateObject.GetField("_chart") as Chart;
            var chart = parseEngine.Chart;

            Assert.IsNotNull(chart);
            Assert.AreEqual(2, chart.Count);
            Assert.IsTrue(parseEngine.IsAccepted());
        }
Exemplo n.º 3
0
        public void TerminalLexemShouldWhileAcceptedContinuesToMatch()
        {
            var terminalLexeme = new TerminalLexerRule('c').CreateLexeme(0);

            Assert.IsFalse(terminalLexeme.IsAccepted());
            Assert.IsTrue(terminalLexeme.Scan('c'));
            Assert.IsTrue(terminalLexeme.IsAccepted());
            Assert.IsFalse(terminalLexeme.Scan('c'));
        }
Exemplo n.º 4
0
        static EbnfGrammar()
        {
            var grammar       = new NonTerminal(Grammar);
            var rule          = new NonTerminal(Rule);
            var leftHandSide  = new NonTerminal(LeftHandSide);
            var expression    = new NonTerminal(Expression);
            var optional      = new NonTerminal(Optional);
            var repetition    = new NonTerminal(Repetition);
            var group         = new NonTerminal(Group);
            var alteration    = new NonTerminal(Alteration);
            var concatenation = new NonTerminal(Concatenation);

            var equal        = new TerminalLexerRule('=');
            var semicolon    = new TerminalLexerRule(';');
            var openBracket  = new TerminalLexerRule('[');
            var closeBracket = new TerminalLexerRule(']');
            var openBrace    = new TerminalLexerRule('{');
            var closeBrace   = new TerminalLexerRule('}');
            var openParen    = new TerminalLexerRule('(');
            var closeParen   = new TerminalLexerRule(')');
            var pipe         = new TerminalLexerRule('|');
            var comma        = new TerminalLexerRule(',');

            var identifier = Identifier();
            var terminal   = Terminal();

            var productions = new[]
            {
                new Production(grammar),
                new Production(grammar, grammar, rule),
                new Production(rule, leftHandSide, equal, expression, semicolon),
                new Production(leftHandSide, identifier),
                new Production(expression, identifier),
                new Production(expression, terminal),
                new Production(expression, optional),
                new Production(expression, repetition),
                new Production(expression, group),
                new Production(expression, alteration),
                new Production(expression, concatenation),
                new Production(optional, openBracket, expression, closeBracket),
                new Production(repetition, openBrace, expression, closeBrace),
                new Production(group, openParen, expression, closeParen),
                new Production(alteration, expression, pipe, expression),
                new Production(concatenation, expression, comma, expression)
            };

            var whitespace = Whitespace();

            var ignore = new[]
            {
                whitespace
            };

            _ebnfGrammar = new Grammar(grammar, productions, ignore, null);
        }
Exemplo n.º 5
0
        private static Grammar CreateExpressionGrammar()
        {
            var digit = new TerminalLexerRule(DigitTerminal.Instance, new TokenName("digit"));

            ProductionExpression S = "S", M = "M", T = "T";

            S.Rule = (S + '+' + M) | M;
            M.Rule = (M + '*' + T) | T;
            T.Rule = digit;

            var grammar = new GrammarExpression(S, new[] { S, M, T }).ToGrammar();

            return(grammar);
        }
        public void DeterministicParseEngineShouldParseRepeatingRightRecursiveRule()
        {
            var number       = new NumberLexerRule();
            var openBracket  = new TerminalLexerRule('[');
            var closeBracket = new TerminalLexerRule(']');
            var comma        = new TerminalLexerRule(',');

            ProductionExpression
                A  = "A",
                V  = "V",
                VR = "VR";

            A.Rule  = openBracket + VR + closeBracket;
            VR.Rule = V
                      | V + comma + VR
                      | (Expr)null;
            V.Rule = number;

            var grammar = new GrammarExpression(
                A, new[] { A, V, VR }).ToGrammar();

            var determinisicParseEngine = new DeterministicParseEngine(grammar);

            var tokens = new[]
            {
                new Token("[", 0, openBracket.TokenType),
                new Token("1", 1, number.TokenType),
                new Token(",", 2, comma.TokenType),
                new Token("2", 3, number.TokenType),
                new Token("]", 4, closeBracket.TokenType)
            };

            for (var i = 0; i < tokens.Length; i++)
            {
                var result = determinisicParseEngine.Pulse(tokens[i]);
                if (!result)
                {
                    Assert.Fail($"Failure parsing at position {determinisicParseEngine.Location}");
                }
            }

            var accepted = determinisicParseEngine.IsAccepted();

            if (!accepted)
            {
                Assert.Fail($"Input was not accepted.");
            }
        }
Exemplo n.º 7
0
        private static IGrammar CreateExpressionGrammar()
        {
            var digit = new TerminalLexerRule(
                new DigitTerminal(),
                new TokenType("digit"));

            ProductionExpression S = "S", M = "M", T = "T";

            S.Rule = S + '+' + M | M;
            M.Rule = M + '*' + T | T;
            T.Rule = digit;

            var grammar = new GrammarExpression(S, new[] { S, M, T }).ToGrammar();

            return(grammar);
        }
Exemplo n.º 8
0
        /// <summary>
        /// <code>
        /// grammar =
        ///     rule | rule syntax ;
        /// rule =
        ///     "&lt;" rule-name "&gt;" "::=" expresion line-end ;
        /// expression =
        ///     list | list "|" expression;
        /// line-end   =
        ///     EOL | line-end line-end;
        /// list       =
        ///     term | term list;
        /// literal =
        ///     single_quote_string | double_quote_string ;
        /// single_quote_string ~ /['][^']*[']/;
        /// double_quote_string ~ /["][^"]*["]/;
        /// </code>
        /// </summary>
        static BnfGrammar()
        {
            var whitespace        = Whitespace();
            var ruleName          = RuleName();
            var implements        = Implements();
            var eol               = EndOfLine();
            var doubleQuoteString = DoubleQuoteString();
            var singleQuoteString = SingleQuoteString();

            var grammar    = new NonTerminal("grammar");
            var rule       = new NonTerminal("rule");
            var identifier = new NonTerminal("identifier");
            var expression = new NonTerminal("expression");
            var lineEnd    = new NonTerminal("line-end");
            var list       = new NonTerminal("list");
            var term       = new NonTerminal("term");
            var literal    = new NonTerminal("literal");

            var lessThan    = new TerminalLexerRule('<');
            var greaterThan = new TerminalLexerRule('>');
            var pipe        = new TerminalLexerRule('|');

            var productions = new[]
            {
                new Production(grammar, rule),
                new Production(grammar, rule, grammar),
                new Production(rule, identifier, implements, expression),
                new Production(expression, list),
                new Production(expression, list, pipe, expression),
                new Production(lineEnd, eol),
                new Production(lineEnd, lineEnd, lineEnd),
                new Production(list, term),
                new Production(list, term, list),
                new Production(term, literal),
                new Production(term, identifier),
                new Production(identifier, lessThan, ruleName, greaterThan),
                new Production(literal, doubleQuoteString),
                new Production(literal, singleQuoteString),
            };

            var ignore = new[]
            {
                whitespace
            };

            _bnfGrammar = new Grammar(grammar, productions, ignore, null);
        }
Exemplo n.º 9
0
        public void ParseEngineRightRecursionShouldNotBeCubicComplexity()
        {
            var a = new TerminalLexerRule(
                new CharacterTerminal('a'),
                new TokenType("a"));
            ProductionExpression A = "A";

            A.Rule =
                'a' + A
                | (Expr)null;

            var grammar = new GrammarExpression(A, new[] { A })
                          .ToGrammar();

            var input      = Tokenize("aaaaa");
            var recognizer = new ParseEngine(grammar);

            ParseInput(recognizer, input);

            var chart = GetChartFromParseEngine(recognizer);

            // -- 0 --
            // A ->.a A		    (0)	 # Start
            // A ->.			(0)	 # Start
            //
            // ...
            // -- n --
            // n	A -> a.A		(n-1)	 # Scan a
            // n	A ->.a A		(n)	 # Predict
            // n	A ->.			(n)	 # Predict
            // n	A -> a A.		(n)	 # Predict
            // n	A : A -> a A.	(0)	 # Transition
            // n	A -> a A.		(0)	 # Complete
            Assert.AreEqual(input.Count() + 1, chart.Count);
            var lastEarleySet = chart.EarleySets[chart.EarleySets.Count - 1];

            Assert.AreEqual(3, lastEarleySet.Completions.Count);
            Assert.AreEqual(1, lastEarleySet.Transitions.Count);
            Assert.AreEqual(1, lastEarleySet.Predictions.Count);
            Assert.AreEqual(1, lastEarleySet.Scans.Count);
        }
        public void DeterministicParseEngineShouldParseInSubCubicTimeGivenRightRecursiveGrammar()
        {
            var a = new TerminalLexerRule(
                new CharacterTerminal('a'),
                new TokenType("a"));
            ProductionExpression A = "A";

            A.Rule =
                'a' + A
                | (Expr)null;

            var grammarExpression = new GrammarExpression(A, new[] { A });

            var parseTester = new ParseTester(
                new DeterministicParseEngine(
                    new PreComputedGrammar(grammarExpression.ToGrammar())));

            const string input = "aaaaa";

            parseTester.RunParse(input);
        }
Exemplo n.º 11
0
        static PdlGrammar()
        {
            BaseLexerRule
                settingIdentifier = SettingIdentifier(),
                notDoubleQuote    = NotDoubleQuote(),
                notSingleQuote    = NotSingleQuote(),
                identifier        = Identifier(),
                any             = new TerminalLexerRule(new AnyTerminal(), "."),
                notCloseBracket = new TerminalLexerRule(
                new NegationTerminal(new CharacterTerminal(']')), "[^\\]]"),
                escapeCharacter  = EscapeCharacter(),
                whitespace       = Whitespace(),
                multiLineComment = MultiLineComment();

            ProductionExpression
                definition          = Definition,
                block               = Block,
                rule                = Rule,
                setting             = Setting,
                lexerRule           = LexerRule,
                qualifiedIdentifier = QualifiedIdentifier,
                expression          = Expression,
                term                = Term,
                factor              = Factor,
                literal             = Literal,
                grouping            = Grouping,
                repetition          = Repetition,
                optional            = Optional,
                lexerRuleExpression = LexerRuleExpression,
                lexerRuleTerm       = LexerRuleTerm,
                lexerRuleFactor     = LexerRuleFactor;

            var regexGrammar             = new RegexGrammar();
            var regexProductionReference = new ProductionReferenceExpression(regexGrammar);

            definition.Rule =
                block
                | block + definition;

            block.Rule =
                rule
                | setting
                | lexerRule;

            rule.Rule =
                qualifiedIdentifier + '=' + expression + ';';

            setting.Rule = (Expr)
                           settingIdentifier + '=' + qualifiedIdentifier + ';';

            lexerRule.Rule =
                qualifiedIdentifier + '~' + lexerRuleExpression + ';';

            expression.Rule =
                term
                | term + '|' + expression;

            term.Rule =
                factor
                | factor + term;

            factor.Rule
                = qualifiedIdentifier
                  | literal
                  | '/' + regexProductionReference + '/'
                  | repetition
                  | optional
                  | grouping;

            literal.Rule = (Expr)
                           new SingleQuoteStringLexerRule()
                           | new DoubleQuoteStringLexerRule();

            repetition.Rule = (Expr)
                              '{' + expression + '}';

            optional.Rule = (Expr)
                            '[' + expression + ']';

            grouping.Rule = (Expr)
                            '(' + expression + ')';

            qualifiedIdentifier.Rule =
                identifier
                | (Expr)identifier + '.' + qualifiedIdentifier;

            lexerRuleExpression.Rule =
                lexerRuleTerm
                | lexerRuleTerm + '|' + lexerRuleExpression;

            lexerRuleTerm.Rule =
                lexerRuleFactor
                | lexerRuleFactor + lexerRuleTerm;

            lexerRuleFactor.Rule =
                literal
                | '/' + regexProductionReference + '/';

            var grammarExpression = new GrammarExpression(
                definition,
                new[]
            {
                definition,
                block,
                rule,
                setting,
                lexerRule,
                expression,
                term,
                factor,
                literal,
                repetition,
                optional,
                grouping,
                qualifiedIdentifier,
                lexerRuleExpression,
                lexerRuleTerm,
                lexerRuleFactor
            },
                new[] { new LexerRuleModel(whitespace), new LexerRuleModel(multiLineComment) });

            _pdlGrammar = grammarExpression.ToGrammar();
        }
Exemplo n.º 12
0
        public void TerminalLexemeResetShouldClearPreExistingValues()
        {
            var terminalLexeme = new TerminalLexerRule('c').CreateLexeme(0);

            Assert.IsTrue(terminalLexeme.Scan('c'));
        }
Exemplo n.º 13
0
        public void TerminalLexemeShouldInitializeCatpureToEmptyString()
        {
            var terminalLexeme = new TerminalLexerRule('c').CreateLexeme(0);

            Assert.AreEqual(string.Empty, terminalLexeme.Value);
        }
Exemplo n.º 14
0
        static RegexGrammar()
        {
            var notMeta         = CreateNotMetaLexerRule();
            var notCloseBracket = CreateNotCloseBracketLexerRule();
            var escape          = CreateEscapeCharacterLexerRule();

            var regex                   = NonTerminal.From(Regex);
            var expression              = NonTerminal.From(Expression);
            var term                    = NonTerminal.From(Term);
            var factor                  = NonTerminal.From(Factor);
            var atom                    = NonTerminal.From(Atom);
            var iterator                = NonTerminal.From(Iterator);
            var set                     = NonTerminal.From(Set);
            var positiveSet             = NonTerminal.From(PositiveSet);
            var negativeSet             = NonTerminal.From(NegativeSet);
            var characterClass          = NonTerminal.From(CharacterClass);
            var characterRange          = NonTerminal.From(CharacterRange);
            var character               = NonTerminal.From(Character);
            var characterClassCharacter = NonTerminal.From(CharacterClassCharacter);

            var caret        = new TerminalLexerRule('^');
            var dollar       = new TerminalLexerRule('$');
            var pipe         = new TerminalLexerRule('|');
            var dot          = new TerminalLexerRule('.');
            var openParen    = new TerminalLexerRule('(');
            var closeParen   = new TerminalLexerRule(')');
            var star         = new TerminalLexerRule('*');
            var plus         = new TerminalLexerRule('+');
            var question     = new TerminalLexerRule('?');
            var openBracket  = new TerminalLexerRule('[');
            var closeBracket = new TerminalLexerRule(']');
            var minus        = new TerminalLexerRule('-');

            var productions = new[]
            {
                Production.From(regex, expression),
                Production.From(regex, caret, expression),
                Production.From(regex, expression, dollar),
                Production.From(regex, caret, expression, dollar),
                Production.From(expression, term),
                Production.From(expression, term, pipe, expression),
                Production.From(term, factor),
                Production.From(term, factor, term),
                Production.From(factor, atom),
                Production.From(factor, atom, iterator),
                Production.From(atom, dot),
                Production.From(atom, character),
                Production.From(atom, openParen, expression, closeParen),
                Production.From(atom, set),
                Production.From(iterator, star),
                Production.From(iterator, plus),
                Production.From(iterator, question),
                Production.From(set, positiveSet),
                Production.From(set, negativeSet),
                Production.From(negativeSet, openBracket, caret, characterClass, closeBracket),
                Production.From(positiveSet, openBracket, characterClass, closeBracket),
                Production.From(characterClass, characterRange),
                Production.From(characterClass, characterRange, characterClass),
                Production.From(characterRange, characterClassCharacter),
                Production.From(characterRange, characterClassCharacter, minus, characterClassCharacter),
                Production.From(character, notMeta),
                Production.From(character, escape),
                Production.From(characterClassCharacter, notCloseBracket),
                Production.From(characterClassCharacter, escape)
            };

            grammar = new ConcreteGrammar(regex, productions, null, null);
        }
Exemplo n.º 15
0
        static RegexGrammar()
        {
            var notMeta         = NotMeta();
            var notCloseBracket = NotCloseBracket();
            var escape          = EscapeCharacter();

            var regex                   = new NonTerminal(Regex);
            var expression              = new NonTerminal(Expression);
            var term                    = new NonTerminal(Term);
            var factor                  = new NonTerminal(Factor);
            var atom                    = new NonTerminal(Atom);
            var iterator                = new NonTerminal(Iterator);
            var set                     = new NonTerminal(Set);
            var positiveSet             = new NonTerminal(PositiveSet);
            var negativeSet             = new NonTerminal(NegativeSet);
            var characterClass          = new NonTerminal(CharacterClass);
            var characterRange          = new NonTerminal(CharacterRange);
            var character               = new NonTerminal(Character);
            var characterClassCharacter = new NonTerminal(CharacterClassCharacter);

            var caret        = new TerminalLexerRule('^');
            var dollar       = new TerminalLexerRule('$');
            var pipe         = new TerminalLexerRule('|');
            var dot          = new TerminalLexerRule('.');
            var openParen    = new TerminalLexerRule('(');
            var closeParen   = new TerminalLexerRule(')');
            var star         = new TerminalLexerRule('*');
            var plus         = new TerminalLexerRule('+');
            var question     = new TerminalLexerRule('?');
            var openBracket  = new TerminalLexerRule('[');
            var closeBracket = new TerminalLexerRule(']');
            var minus        = new TerminalLexerRule('-');

            var productions = new[]
            {
                new Production(regex, expression),
                new Production(regex, caret, expression),
                new Production(regex, expression, dollar),
                new Production(regex, caret, expression, dollar),
                new Production(expression, term),
                new Production(expression, term, pipe, expression),
                new Production(term, factor),
                new Production(term, factor, term),
                new Production(factor, atom),
                new Production(factor, atom, iterator),
                new Production(atom, dot),
                new Production(atom, character),
                new Production(atom, openParen, expression, closeParen),
                new Production(atom, set),
                new Production(iterator, star),
                new Production(iterator, plus),
                new Production(iterator, question),
                new Production(set, positiveSet),
                new Production(set, negativeSet),
                new Production(negativeSet, openBracket, caret, characterClass, closeBracket),
                new Production(positiveSet, openBracket, characterClass, closeBracket),
                new Production(characterClass, characterRange),
                new Production(characterClass, characterRange, characterClass),
                new Production(characterRange, characterClassCharacter),
                new Production(characterRange, characterClassCharacter, minus, characterClassCharacter),
                new Production(character, notMeta),
                new Production(character, escape),
                new Production(characterClassCharacter, notCloseBracket),
                new Production(characterClassCharacter, escape)
            };

            _regexGrammar = new Grammar(regex, productions, null, null);
        }