public void MarpaParseEngineCanParseRegex() { var regexGrammar = new RegexGrammar(); var preComputedRegexGrammar = new PreComputedGrammar(regexGrammar); var parseEngine = new MarpaParseEngine(preComputedRegexGrammar); var pattern = "[a-z][0-9]abc123"; var openBracket = new TokenType("["); var notMeta = new TokenType("NotMeta"); // maybe make this token type a readonly property on the regex grammar? var notCloseBracket = new TokenType("NotCloseBracket"); // maybe make this token type a readonly property on the regex grammar? var closeBracket = new TokenType("]"); var dash = new TokenType("-"); for (int i = 0; i < pattern.Length; i++) { TokenType tokenType = (pattern[i]) switch { '[' => openBracket, ']' => closeBracket, '-' => dash, _ => i < 10 ? notCloseBracket : notMeta, }; var token = new Token(pattern[i].ToString(), i, tokenType); var result = parseEngine.Pulse(token); Assert.IsTrue(result, $"Error at position {i}"); } Assert.IsTrue(parseEngine.IsAccepted(), "Parse was not accepted"); }
private void Matches(RegexGrammar g, string regex, string s) { var converter = new RegexConverter(); var c = converter.Convert(g.ParseExpression(regex)); var runner = PatternCompiler.Default.Compile(new Pattern() { Data = c }); var result = runner.Run(s); Assert.IsTrue(result.IsSuccessful && result.InputPosition >= s.Length, $"PEG from regex {regex} must match {s}. Matched {result.InputPosition} characters. Success: {result.IsSuccessful}"); }
public void DeterministicParseEngineCanParseRegex() { var regexGrammar = new RegexGrammar(); var preComputedRegexGrammar = new PreComputedGrammar(regexGrammar); var parseEngine = new DeterministicParseEngine(preComputedRegexGrammar); var pattern = "[a-z][0-9]abc123"; var openBracket = new TokenType("["); var notMeta = new TokenType("NotMeta"); // maybe make this token type a readonly property on the regex grammar? var notCloseBracket = new TokenType("NotCloseBracket"); // maybe make this token type a readonly property on the regex grammar? var closeBracket = new TokenType("]"); var dash = new TokenType("-"); for (int i = 0; i < pattern.Length; i++) { TokenType tokenType = null; switch (pattern[i]) { case '[': tokenType = openBracket; break; case ']': tokenType = closeBracket; break; case '-': tokenType = dash; break; default: if (i < 10) { tokenType = notCloseBracket; } else { tokenType = notMeta; } break; } var token = new Token(pattern[i].ToString(), i, tokenType); var result = parseEngine.Pulse(token); Assert.IsTrue(result, $"Error at position {i}"); } Assert.IsTrue(parseEngine.IsAccepted(), "Parse was not accepted"); }
private static int GetNumCaptures(string regex, string strData) { var data = strData.ToCharArray(); var g = new RegexGrammar(PatternCompiler.Default); var converter = new RegexConverter(); var c = converter.Convert(g.ParseExpression(regex)); var matchPattern = new ZeroOrMore(new PrioritizedChoice(new CaptureGroup(0, c), new Any())); var p = new Pattern(null) { Data = matchPattern }; var runner = PatternCompiler.Default.Compile(p); var captures = new List <Capture>(); var result = runner.Run(data, 0, data.Length, captures); return(result.IsSuccessful ? captures.Count : -1); }
public void NodeVisitorShouldWalkSimpleRegex() { var regexGrammar = new RegexGrammar(); var regexParseEngine = new ParseEngine(regexGrammar); var regexLexer = new ParseRunner(regexParseEngine, @"[(]\d[)]"); while (!regexLexer.EndOfStream()) { if (!regexLexer.Read()) { Assert.Fail($"error parsing input at position {regexLexer.Position}"); } } Assert.IsTrue(regexParseEngine.IsAccepted()); var nodeVisitor = new LoggingNodeVisitor( new SelectFirstChildDisambiguationAlgorithm()); var root = regexParseEngine.GetParseForestRootNode(); root.Accept(nodeVisitor); Assert.AreEqual(31, nodeVisitor.VisitLog.Count); }
static PdlGrammar() { BaseLexerRule settingIdentifier = SettingIdentifier(), notDoubleQuote = NotDoubleQuote(), notSingleQuote = NotSingleQuote(), identifier = Identifier(), any = new TerminalLexerRule(new AnyTerminal(), "."), notCloseBracket = new TerminalLexerRule( new NegationTerminal(new CharacterTerminal(']')), "[^\\]]"), escapeCharacter = EscapeCharacter(), whitespace = Whitespace(), multiLineComment = MultiLineComment(); ProductionExpression definition = Definition, block = Block, rule = Rule, setting = Setting, lexerRule = LexerRule, qualifiedIdentifier = QualifiedIdentifier, expression = Expression, term = Term, factor = Factor, literal = Literal, grouping = Grouping, repetition = Repetition, optional = Optional, lexerRuleExpression = LexerRuleExpression, lexerRuleTerm = LexerRuleTerm, lexerRuleFactor = LexerRuleFactor; var regexGrammar = new RegexGrammar(); var regexProductionReference = new ProductionReferenceExpression(regexGrammar); definition.Rule = block | block + definition; block.Rule = rule | setting | lexerRule; rule.Rule = qualifiedIdentifier + '=' + expression + ';'; setting.Rule = (Expr) settingIdentifier + '=' + qualifiedIdentifier + ';'; lexerRule.Rule = qualifiedIdentifier + '~' + lexerRuleExpression + ';'; expression.Rule = term | term + '|' + expression; term.Rule = factor | factor + term; factor.Rule = qualifiedIdentifier | literal | '/' + regexProductionReference + '/' | repetition | optional | grouping; literal.Rule = (Expr) new SingleQuoteStringLexerRule() | new DoubleQuoteStringLexerRule(); repetition.Rule = (Expr) '{' + expression + '}'; optional.Rule = (Expr) '[' + expression + ']'; grouping.Rule = (Expr) '(' + expression + ')'; qualifiedIdentifier.Rule = identifier | (Expr)identifier + '.' + qualifiedIdentifier; lexerRuleExpression.Rule = lexerRuleTerm | lexerRuleTerm + '|' + lexerRuleExpression; lexerRuleTerm.Rule = lexerRuleFactor | lexerRuleFactor + lexerRuleTerm; lexerRuleFactor.Rule = literal | '/' + regexProductionReference + '/'; var grammarExpression = new GrammarExpression( definition, new[] { definition, block, rule, setting, lexerRule, expression, term, factor, literal, repetition, optional, grouping, qualifiedIdentifier, lexerRuleExpression, lexerRuleTerm, lexerRuleFactor }, new[] { new LexerRuleModel(whitespace), new LexerRuleModel(multiLineComment) }); _pdlGrammar = grammarExpression.ToGrammar(); }