Exemplo n.º 1
0
        public void MarpaParseEngineCanParseRegex()
        {
            var regexGrammar            = new RegexGrammar();
            var preComputedRegexGrammar = new PreComputedGrammar(regexGrammar);
            var parseEngine             = new MarpaParseEngine(preComputedRegexGrammar);

            var pattern = "[a-z][0-9]abc123";

            var openBracket     = new TokenType("[");
            var notMeta         = new TokenType("NotMeta");         // maybe make this token type a readonly property on the regex grammar?
            var notCloseBracket = new TokenType("NotCloseBracket"); // maybe make this token type a readonly property on the regex grammar?
            var closeBracket    = new TokenType("]");
            var dash            = new TokenType("-");

            for (int i = 0; i < pattern.Length; i++)
            {
                TokenType tokenType = (pattern[i]) switch
                {
                    '[' => openBracket,
                    ']' => closeBracket,
                    '-' => dash,
                    _ => i < 10
                          ? notCloseBracket
                          : notMeta,
                };
                var token  = new Token(pattern[i].ToString(), i, tokenType);
                var result = parseEngine.Pulse(token);
                Assert.IsTrue(result, $"Error at position {i}");
            }
            Assert.IsTrue(parseEngine.IsAccepted(), "Parse was not accepted");
        }
Exemplo n.º 2
0
        private void Matches(RegexGrammar g, string regex, string s)
        {
            var converter = new RegexConverter();
            var c         = converter.Convert(g.ParseExpression(regex));

            var runner = PatternCompiler.Default.Compile(new Pattern()
            {
                Data = c
            });
            var result = runner.Run(s);

            Assert.IsTrue(result.IsSuccessful && result.InputPosition >= s.Length, $"PEG from regex {regex} must match {s}. Matched {result.InputPosition} characters. Success: {result.IsSuccessful}");
        }
        public void DeterministicParseEngineCanParseRegex()
        {
            var regexGrammar            = new RegexGrammar();
            var preComputedRegexGrammar = new PreComputedGrammar(regexGrammar);
            var parseEngine             = new DeterministicParseEngine(preComputedRegexGrammar);

            var pattern = "[a-z][0-9]abc123";

            var openBracket     = new TokenType("[");
            var notMeta         = new TokenType("NotMeta");         // maybe make this token type a readonly property on the regex grammar?
            var notCloseBracket = new TokenType("NotCloseBracket"); // maybe make this token type a readonly property on the regex grammar?
            var closeBracket    = new TokenType("]");
            var dash            = new TokenType("-");

            for (int i = 0; i < pattern.Length; i++)
            {
                TokenType tokenType = null;
                switch (pattern[i])
                {
                case '[':
                    tokenType = openBracket;
                    break;

                case ']':
                    tokenType = closeBracket;
                    break;

                case '-':
                    tokenType = dash;
                    break;

                default:
                    if (i < 10)
                    {
                        tokenType = notCloseBracket;
                    }
                    else
                    {
                        tokenType = notMeta;
                    }
                    break;
                }
                var token  = new Token(pattern[i].ToString(), i, tokenType);
                var result = parseEngine.Pulse(token);
                Assert.IsTrue(result, $"Error at position {i}");
            }
            Assert.IsTrue(parseEngine.IsAccepted(), "Parse was not accepted");
        }
Exemplo n.º 4
0
        private static int GetNumCaptures(string regex, string strData)
        {
            var data = strData.ToCharArray();
            var g    = new RegexGrammar(PatternCompiler.Default);

            var converter = new RegexConverter();
            var c         = converter.Convert(g.ParseExpression(regex));

            var matchPattern = new ZeroOrMore(new PrioritizedChoice(new CaptureGroup(0, c), new Any()));

            var p = new Pattern(null)
            {
                Data = matchPattern
            };

            var runner   = PatternCompiler.Default.Compile(p);
            var captures = new List <Capture>();
            var result   = runner.Run(data, 0, data.Length, captures);

            return(result.IsSuccessful ? captures.Count : -1);
        }
Exemplo n.º 5
0
        public void NodeVisitorShouldWalkSimpleRegex()
        {
            var regexGrammar     = new RegexGrammar();
            var regexParseEngine = new ParseEngine(regexGrammar);
            var regexLexer       = new ParseRunner(regexParseEngine, @"[(]\d[)]");

            while (!regexLexer.EndOfStream())
            {
                if (!regexLexer.Read())
                {
                    Assert.Fail($"error parsing input at position {regexLexer.Position}");
                }
            }
            Assert.IsTrue(regexParseEngine.IsAccepted());

            var nodeVisitor = new LoggingNodeVisitor(
                new SelectFirstChildDisambiguationAlgorithm());
            var root = regexParseEngine.GetParseForestRootNode();

            root.Accept(nodeVisitor);
            Assert.AreEqual(31, nodeVisitor.VisitLog.Count);
        }
Exemplo n.º 6
0
        static PdlGrammar()
        {
            BaseLexerRule
                settingIdentifier = SettingIdentifier(),
                notDoubleQuote    = NotDoubleQuote(),
                notSingleQuote    = NotSingleQuote(),
                identifier        = Identifier(),
                any             = new TerminalLexerRule(new AnyTerminal(), "."),
                notCloseBracket = new TerminalLexerRule(
                new NegationTerminal(new CharacterTerminal(']')), "[^\\]]"),
                escapeCharacter  = EscapeCharacter(),
                whitespace       = Whitespace(),
                multiLineComment = MultiLineComment();

            ProductionExpression
                definition          = Definition,
                block               = Block,
                rule                = Rule,
                setting             = Setting,
                lexerRule           = LexerRule,
                qualifiedIdentifier = QualifiedIdentifier,
                expression          = Expression,
                term                = Term,
                factor              = Factor,
                literal             = Literal,
                grouping            = Grouping,
                repetition          = Repetition,
                optional            = Optional,
                lexerRuleExpression = LexerRuleExpression,
                lexerRuleTerm       = LexerRuleTerm,
                lexerRuleFactor     = LexerRuleFactor;

            var regexGrammar             = new RegexGrammar();
            var regexProductionReference = new ProductionReferenceExpression(regexGrammar);

            definition.Rule =
                block
                | block + definition;

            block.Rule =
                rule
                | setting
                | lexerRule;

            rule.Rule =
                qualifiedIdentifier + '=' + expression + ';';

            setting.Rule = (Expr)
                           settingIdentifier + '=' + qualifiedIdentifier + ';';

            lexerRule.Rule =
                qualifiedIdentifier + '~' + lexerRuleExpression + ';';

            expression.Rule =
                term
                | term + '|' + expression;

            term.Rule =
                factor
                | factor + term;

            factor.Rule
                = qualifiedIdentifier
                  | literal
                  | '/' + regexProductionReference + '/'
                  | repetition
                  | optional
                  | grouping;

            literal.Rule = (Expr)
                           new SingleQuoteStringLexerRule()
                           | new DoubleQuoteStringLexerRule();

            repetition.Rule = (Expr)
                              '{' + expression + '}';

            optional.Rule = (Expr)
                            '[' + expression + ']';

            grouping.Rule = (Expr)
                            '(' + expression + ')';

            qualifiedIdentifier.Rule =
                identifier
                | (Expr)identifier + '.' + qualifiedIdentifier;

            lexerRuleExpression.Rule =
                lexerRuleTerm
                | lexerRuleTerm + '|' + lexerRuleExpression;

            lexerRuleTerm.Rule =
                lexerRuleFactor
                | lexerRuleFactor + lexerRuleTerm;

            lexerRuleFactor.Rule =
                literal
                | '/' + regexProductionReference + '/';

            var grammarExpression = new GrammarExpression(
                definition,
                new[]
            {
                definition,
                block,
                rule,
                setting,
                lexerRule,
                expression,
                term,
                factor,
                literal,
                repetition,
                optional,
                grouping,
                qualifiedIdentifier,
                lexerRuleExpression,
                lexerRuleTerm,
                lexerRuleFactor
            },
                new[] { new LexerRuleModel(whitespace), new LexerRuleModel(multiLineComment) });

            _pdlGrammar = grammarExpression.ToGrammar();
        }