public void AycockHorspoolAlgorithmShouldAcceptVulnerableGrammar()
        {
            var a = new TerminalLexerRule(
                new CharacterTerminal('a'),
                new TokenType("a"));

            ProductionExpression
                SPrime = "S'",
                S = "S",
                A = "A",
                E = "E";

            SPrime.Rule = S;
            S.Rule = (Expr)S | A + A + A + A;
            A.Rule = (Expr)"a" | E;

            var expression = new GrammarExpression(
                SPrime,
                new[] { SPrime, S, A, E });

            var grammar = expression.ToGrammar();

            var parseEngine = new ParseEngine(grammar);
            parseEngine.Pulse(new Token("a", 0, a.TokenType));

            var privateObject = new PrivateObject(parseEngine);
            var chart = privateObject.GetField("_chart") as Chart;

            Assert.IsNotNull(chart);
            Assert.AreEqual(2, chart.Count);
            Assert.IsTrue(parseEngine.IsAccepted());
        }
Esempio n. 2
0
        static BnfGrammar()
        {
            /*
             *  <grammar>        ::= <rule> | <rule> <grammar>
             *  <rule>           ::= "<" <rule-name> ">" "::=" <expression>
             *  <expression>     ::= <list> | <list> "|" <expression>
             *  <line-end>       ::= <EOL> | <line-end> <line-end>
             *  <list>           ::= <term> | <term> <list>
             *  <term>           ::= <literal> | "<" <rule-name> ">"
             *  <literal>        ::= '"' <text> '"' | "'" <text> "'"
             */
            var whitespace = CreateWhitespaceLexerRule();
            var ruleName = CreateRuleNameLexerRule();
            var implements = CreateImplementsLexerRule();
            var eol = CreateEndOfLineLexerRule();
            var notDoubleQuote = CreateNotDoubleQuoteLexerRule();
            var notSingleQuuote = CreateNotSingleQuoteLexerRule();

            var grammar = new NonTerminal("grammar");
            var rule = new NonTerminal("rule");
            var identifier = new NonTerminal("identifier");
            var expression = new NonTerminal("expression");
            var lineEnd = new NonTerminal("line-end");
            var list = new NonTerminal("list");
            var term = new NonTerminal("term");
            var literal = new NonTerminal("literal");
            var doubleQuoteText = new NonTerminal("doubleQuoteText");
            var singleQuoteText = new NonTerminal("singleQuoteText");

            var lessThan = new TerminalLexerRule('<');
            var greaterThan = new TerminalLexerRule('>');
            var doubleQuote = new TerminalLexerRule('"');
            var slash = new TerminalLexerRule('\'');
            var pipe = new TerminalLexerRule('|');

            var productions = new[]
            {
                new Production(grammar, rule),
                new Production(grammar, rule, grammar),
                new Production(rule, identifier, implements, expression),
                new Production(expression, list),
                new Production(expression, list, pipe, expression),
                new Production(lineEnd, eol),
                new Production(lineEnd, lineEnd, lineEnd),
                new Production(list, term),
                new Production(list, term, list),
                new Production(term, literal),
                new Production(term, identifier),
                new Production(identifier, lessThan, ruleName, greaterThan),
                new Production(literal, doubleQuote, notDoubleQuote, doubleQuote),
                new Production(literal, slash, notSingleQuuote, slash)
            };

            var ignore = new[]
            {
                whitespace
            };

            _bnfGrammar = new Grammar(grammar, productions, ignore);
        }
        public void Test_AycockHorspoolAlgorithm_That_Vulnerable_Grammar_Accepts_Input()
        {
            var a = new TerminalLexerRule(
                new Terminal('a'),
                new TokenType("a"));

            var grammar = new GrammarBuilder("S'")
            .Production("S'", r => r
                .Rule("S"))
            .Production("S", r => r
                .Rule("A", "A", "A", "A"))
            .Production("A", r => r
                .Rule(a)
                .Rule("E"))
            .Production("E", r => r
                .Lambda())
            .ToGrammar();

            var parseEngine = new ParseEngine(grammar);
            parseEngine.Pulse(new Token("a", 0, a.TokenType));

            var privateObject = new PrivateObject(parseEngine);
            var chart = privateObject.GetField("_chart") as Chart;

            Assert.IsNotNull(chart);
            Assert.AreEqual(2, chart.Count);
            Assert.IsTrue(parseEngine.IsAccepted());
        }
Esempio n. 4
0
 public IRuleBuilder Rule(params object[] symbols)
 {
     var symbolList = new List<ISymbol>();
     if (symbols != null)
     {
         foreach (var symbol in symbols)
         {
             if (symbol is char)
             {
                 var terminal = new Terminal((char)symbol);
                 var lexerRule = new TerminalLexerRule(
                     terminal,
                     new TokenType(terminal.ToString()));
                 symbolList.Add(lexerRule);
             }
             else if (symbol is ITerminal)
             {
                 var terminal = symbol as ITerminal;
                 var lexerRule = new TerminalLexerRule(
                     terminal,
                     new TokenType(terminal.ToString()));
                 symbolList.Add(lexerRule);
             }
             else if (symbol is ILexerRule)
             {
                 symbolList.Add(symbol as ILexerRule);
             }
             else if (symbol is string)
             {
                 var nonTerminal = new NonTerminal(symbol as string);
                 symbolList.Add(nonTerminal);
             }
             else if (symbol == null)
             { }
             else { throw new ArgumentException("unrecognized terminal or nonterminal"); }
         }
     }
     _rules.Add(symbolList);
     return this;
 }
Esempio n. 5
0
        static EbnfGrammar()
        {
            BaseLexerRule
                settingIdentifier = CreateSettingIdentifierLexerRule(),
                notDoubleQuote = CreateNotDoubleQuoteLexerRule(),
                notSingleQuote = CreateNotSingleQuoteLexerRule(),
                identifier = CreateIdentifierLexerRule(),
                any = new TerminalLexerRule(new AnyTerminal(), "."),
                notCloseBracket = new TerminalLexerRule(
                    new NegationTerminal(new CharacterTerminal(']')), "[^\\]]"),
                notMeta = CreateNotMetaLexerRule(),
                escapeCharacter = CreateEscapeCharacterLexerRule(),
                whitespace = CreateWhitespaceLexerRule(),
                multiLineComment = CreateMultiLineCommentLexerRule();

            ProductionExpression
                definition = Definition,
                block = Block,
                rule = Rule,
                setting = Setting,
                lexerRule = LexerRule,
                qualifiedIdentifier = QualifiedIdentifier,
                expression = Expression,
                term = Term,
                factor = Factor,
                literal = Literal,
                grouping = Grouping,
                repetition = Repetition,
                optional = Optional,
                lexerRuleExpression = LexerRuleExpression,
                lexerRuleTerm = LexerRuleTerm,
                lexerRuleFactor = LexerRuleFactor;

            var regexGrammar = new RegexGrammar();
            var regexProductionReference = new ProductionReferenceExpression(regexGrammar);

            definition.Rule =
                block
                | block + definition;

            block.Rule =
                rule
                | setting
                | lexerRule;

            rule.Rule =
                qualifiedIdentifier + '=' + expression + ';';

            setting.Rule = (Expr)
                settingIdentifier + '=' + qualifiedIdentifier + ';';

            lexerRule.Rule =
                qualifiedIdentifier + '~' + lexerRuleExpression + ';';

            expression.Rule =
                term
                | term + '|' + expression;

            term.Rule =
                factor
                | factor + term;

            factor.Rule
                = qualifiedIdentifier
                | literal
                | '/' + regexProductionReference + '/'
                | repetition
                | optional
                | grouping;

            literal.Rule = (Expr)
                '"' + notDoubleQuote + '"'
                | (Expr)"'" + notSingleQuote + "'";

            repetition.Rule = (Expr)
                '{' + expression + '}';

            optional.Rule = (Expr)
                '[' + expression + ']';

            grouping.Rule = (Expr)
                '(' + expression + ')';

            qualifiedIdentifier.Rule =
                identifier
                | (Expr)identifier + '.' + qualifiedIdentifier;

            lexerRuleExpression.Rule =
                lexerRuleTerm
                | lexerRuleTerm + '|' + lexerRuleExpression;

            lexerRuleTerm.Rule =
                lexerRuleFactor
                | lexerRuleFactor + lexerRuleTerm;

            lexerRuleFactor.Rule =
                literal
                | '/' + regexProductionReference + '/';

            var grammarExpression = new GrammarExpression(
                definition,
                new[]
                {
                    definition,
                    block,
                    rule,
                    setting,
                    lexerRule,
                    expression,
                    term,
                    factor,
                    literal,
                    repetition,
                    optional,
                    grouping,
                    qualifiedIdentifier,
                    lexerRuleExpression,
                    lexerRuleTerm,
                    lexerRuleFactor
                },
                new[] { new LexerRuleModel(whitespace), new LexerRuleModel(multiLineComment) });
            _ebnfGrammar = grammarExpression.ToGrammar();
        }
Esempio n. 6
0
        static RegexGrammar()
        {
            /*  Regex                      ->   Expression |
             *                                  '^' Expression |
             *                                  Expression '$' |
             *                                  '^' Expression '$'
             *
             *  Expresion                  ->   Term |
             *                                  Term '|' Expression
             *                                  λ
             *
             *  Term                       ->   Factor |
             *                                  Factor Term
             *
             *  Factor                     ->   Atom |
             *                                  Atom Iterator
             *
             *  Atom                       ->   . |
             *                                  Character |
             *                                  '(' Expression ')' |
             *                                  Set
             *
             *  Set                        ->   PositiveSet |
             *                                  NegativeSet
             *
             *  PositiveSet                ->   '[' CharacterClass ']'
             *
             *  NegativeSet                ->   '[^' CharacterClass ']'
             *
             *  CharacterClass             ->   CharacterRange |
             *                                  CharacterRange CharacterClass
             *
             *  CharacterRange             ->   CharacterClassCharacter |
             *                                  CharacterClassCharacter '-' CharacterClassCharacter
             *
             *  Character                  ->   NotMetaCharacter
             *                                  '\' AnyCharacter
             *                                  EscapeSequence
             *
             *  CharacterClassCharacter    ->   NotCloseBracketCharacter |
             *                                  '\' AnyCharacter
             */
            const string Regex = "Regex";
            const string Expression = "Expression";
            const string Term = "Term";
            const string Factor = "Factor";
            const string Atom = "Atom";
            const string Iterator = "Iterator";
            const string Set = "Set";
            const string PositiveSet = "PositiveSet";
            const string NegativeSet = "NegativeSet";
            const string CharacterClass = "CharacterClass";
            const string Character = "Character";
            const string CharacterRange = "CharacterRange";
            const string CharacterClassCharacter = "CharacterClassCharacter";
            const string NotCloseBracket = "NotCloseBracket";
            const string NotMetaCharacter = "NotMetaCharacter";

            var caret = new TerminalLexerRule('^');
            var dollar = new TerminalLexerRule('$');
            var pipe = new TerminalLexerRule('|');
            var dot = new TerminalLexerRule('.');
            var openParen = new TerminalLexerRule('(');
            var closeParen = new TerminalLexerRule(')');
            var star = new TerminalLexerRule('*');
            var plus = new TerminalLexerRule('+');
            var question = new TerminalLexerRule('?');
            var openBracket = new TerminalLexerRule('[');
            var closeBracket = new TerminalLexerRule(']');
            var notCloseBracket = new TerminalLexerRule(new NegationTerminal(new Terminal(']')), new TokenType("!]"));
            var dash = new TerminalLexerRule('-');
            var backslash = new TerminalLexerRule('\\');
            var notMeta = new TerminalLexerRule(
                new NegationTerminal(
                    new SetTerminal('.', '^', '$', '(', ')', '[', ']', '+', '*', '?', '\\')),
                new TokenType("not-meta"));
            var any = new TerminalLexerRule(new AnyTerminal(), new TokenType("any"));

            var grammarBuilder = new GrammarBuilder(Regex)
                .Production(Regex, r => r
                    .Rule(Expression)
                    .Rule(caret, Expression)
                    .Rule(Expression, dollar)
                    .Rule(caret, Expression, dollar))
                .Production(Expression, r => r
                    .Rule(Term)
                    .Rule(Term, pipe, Expression)
                    .Lambda())
                .Production(Term, r => r
                    .Rule(Factor)
                    .Rule(Factor, Term))
                .Production(Factor, r => r
                    .Rule(Atom)
                    .Rule(Atom, Iterator))
                .Production(Atom, r => r
                    .Rule(dot)
                    .Rule(Character)
                    .Rule(openParen, Expression, closeParen)
                    .Rule(Set))
                .Production(Iterator, r => r
                    .Rule(star)
                    .Rule(plus)
                    .Rule(question))
                .Production(Set, r => r
                    .Rule(PositiveSet)
                    .Rule(NegativeSet))
                .Production(PositiveSet, r => r
                    .Rule(openBracket, CharacterClass, closeBracket))
                .Production(NegativeSet, r => r
                    .Rule(openBracket, caret, CharacterClass, closeBracket))
                .Production(CharacterClass, r => r
                    .Rule(CharacterRange)
                    .Rule(CharacterRange, CharacterClass))
                .Production(CharacterRange, r => r
                    .Rule(CharacterClassCharacter)
                    .Rule(CharacterClassCharacter, dash, CharacterClassCharacter))
                .Production(Character, r => r
                    .Rule(NotMetaCharacter)
                    .Rule(backslash, any))
                .Production(CharacterClassCharacter, r => r
                    .Rule(NotCloseBracket)
                    .Rule(backslash, any))
                .Production(NotMetaCharacter, r => r
                    .Rule(notMeta))
                .Production(NotCloseBracket, r => r
                    .Rule(notCloseBracket))
                .LexerRule(caret)
                .LexerRule(dollar)
                .LexerRule(pipe)
                .LexerRule(dot)
                .LexerRule(openParen)
                .LexerRule(closeParen)
                .LexerRule(star)
                .LexerRule(plus)
                .LexerRule(question)
                .LexerRule(openBracket)
                .LexerRule(closeBracket)
                .LexerRule(notCloseBracket)
                .LexerRule(dash)
                .LexerRule(backslash)
                .LexerRule(notMeta)
                .LexerRule(any);
            _regexGrammar = grammarBuilder.ToGrammar();
        }
Esempio n. 7
0
 public IGrammarBuilder LexerRule(string name, ITerminal terminal)
 {
     var lexerRule = new TerminalLexerRule(terminal, new TokenType(name));
     _lexerRules.Add(lexerRule);
     return this;
 }
Esempio n. 8
0
        static RegexGrammar()
        {
            var notMeta = CreateNotMetaLexerRule();
            var notCloseBracket = CreateNotCloseBracketLexerRule();
            var escape = CreateEscapeCharacterLexerRule();

            var regex = new NonTerminal(Regex);
            var expression = new NonTerminal(Expression);
            var term = new NonTerminal(Term);
            var factor = new NonTerminal(Factor);
            var atom = new NonTerminal(Atom);
            var iterator = new NonTerminal(Iterator);
            var set = new NonTerminal(Set);
            var positiveSet = new NonTerminal(PositiveSet);
            var negativeSet = new NonTerminal(NegativeSet);
            var characterClass = new NonTerminal(CharacterClass);
            var characterRange = new NonTerminal(CharacterRange);
            var character = new NonTerminal(Character);
            var characterClassCharacter = new NonTerminal(CharacterClassCharacter);

            var caret = new TerminalLexerRule('^');
            var dollar = new TerminalLexerRule('$');
            var pipe = new TerminalLexerRule('|');
            var dot = new TerminalLexerRule('.');
            var openParen = new TerminalLexerRule('(');
            var closeParen = new TerminalLexerRule(')');
            var star = new TerminalLexerRule('*');
            var plus = new TerminalLexerRule('+');
            var question = new TerminalLexerRule('?');
            var openBracket = new TerminalLexerRule('[');
            var closeBracket = new TerminalLexerRule(']');
            var minus = new TerminalLexerRule('-');

            var productions = new[]
            {
                new Production(regex, expression),
                new Production(regex, caret, expression),
                new Production(regex, expression, dollar),
                new Production(regex, caret, expression, dollar),
                new Production(expression, term),
                new Production(expression, term, pipe, expression),
                new Production(term, factor),
                new Production(term, factor, term),
                new Production(factor, atom),
                new Production(factor, atom, iterator),
                new Production(atom, dot),
                new Production(atom, character),
                new Production(atom, openParen, expression, closeParen),
                new Production(atom, set),
                new Production(iterator, star),
                new Production(iterator, plus),
                new Production(iterator, question),
                new Production(set, positiveSet),
                new Production(set, negativeSet),
                new Production(positiveSet, openBracket, characterClass, closeBracket),
                new Production(negativeSet, openBracket, caret, characterClass, closeBracket),
                new Production(characterClass, characterRange),
                new Production(characterClass, characterRange, characterClass),
                new Production(characterRange, characterClassCharacter),
                new Production(characterRange, characterClassCharacter, minus, characterClassCharacter),
                new Production(character, notMeta),
                new Production(character, escape),
                new Production(characterClassCharacter, notCloseBracket),
                new Production(characterClassCharacter, escape)
            };

            _regexGrammar = new Grammar(regex, productions, null);
        }