public void EbnfGrammarGeneratorShouldCreateGrammarForOptional() { // R = ['a'] var definition = new EbnfDefinition( new EbnfBlockRule( new EbnfRule( new EbnfQualifiedIdentifier("R"), new EbnfExpression( new EbnfTerm( new EbnfFactorOptional( new EbnfExpression( new EbnfTerm( new EbnfFactorLiteral("a"))))))))); var grammar = GenerateGrammar(definition); Assert.IsNotNull(grammar); Assert.IsNotNull(grammar.Start); ProductionExpression R = "R", optA = "[a]"; R.Rule = optA; optA.Rule = 'a' | (Expr)null; var expectedGrammar = new GrammarExpression(R, new[] { R, optA }).ToGrammar(); Assert.AreEqual(expectedGrammar.Productions.Count, grammar.Productions.Count); }
public void TreeNodeShouldFlattenIntermediateNodes() { ProductionExpression S = "S", A = "A", B = "B", C = "C"; S.Rule = A + B + C; A.Rule = 'a'; B.Rule = 'b'; C.Rule = 'c'; var grammar = new GrammarExpression(S, new[] { S, A, B, C }).ToGrammar(); var input = "abc"; var treeNode = GetTreeNode(grammar, input); var childCount = 0; foreach (var child in treeNode.Children) { childCount++; Assert.AreEqual(TreeNodeType.Internal, child.NodeType); var internalChild = child as IInternalTreeNode; var grandChildCount = 0; foreach (var grandChild in internalChild.Children) { grandChildCount++; Assert.AreEqual(TreeNodeType.Token, grandChild.NodeType); } Assert.AreEqual(1, grandChildCount); } Assert.AreEqual(3, childCount); }
public void PreComputedGrammarIsRightRecursiveShouldNotContainSymbolsWithoutCycles() { ProductionExpression A = "A", B = "B", C = "C", D = "D", E = "E"; A.Rule = B + C; B.Rule = 'b'; C.Rule = A | D; D.Rule = E + D | 'd'; E.Rule = 'e'; var grammar = new GrammarExpression(A).ToGrammar(); var preComputedGrammar = new PreComputedGrammar(grammar); var rightRecursiveRules = new[] { A, C, D }; var notRightRecursiveRules = new[] { B, E }; foreach (var rightRecursiveRule in rightRecursiveRules) { var leftHandSide = rightRecursiveRule.ProductionModel.LeftHandSide.NonTerminal; Assert.IsTrue(preComputedGrammar.Grammar.IsRightRecursive(leftHandSide)); } foreach (var notRightRecursiveRule in notRightRecursiveRules) { var leftHandSide = notRightRecursiveRule.ProductionModel.LeftHandSide.NonTerminal; Assert.IsFalse(preComputedGrammar.Grammar.IsRightRecursive(leftHandSide)); } }
public void ParseEngineShouldProduceSameLeoAndClassicParseForestWhenGivenLongAmbiguousProduction() { ProductionExpression S = "S", A = "A", B = "B", C = "C", D = "D", W = "W", X = "X", Y = "Y", Z = "Z"; S.Rule = A + B + C + D | W + X + Y + Z; A.Rule = '0'; B.Rule = '1'; C.Rule = '2'; D.Rule = '3'; W.Rule = '0'; X.Rule = '1'; Y.Rule = '2'; Z.Rule = '3'; var grammar = new GrammarExpression( S, new[] { S, A, B, C, D, W, X, Y, Z }) .ToGrammar(); AssertLeoAndClassicParseAlgorithmsCreateSameForest("0123", grammar); }
public void ParseRunnerShouldEmitTokenWhenIgnoreCharacterIsEncountered() { const string input = "aa aa"; ProductionExpression S = "S"; S.Rule = _wordRule + S | _wordRule; var grammar = new GrammarExpression( S, new[] { S }, new[] { _whitespaceRule }, null) .ToGrammar(); var parseEngine = new ParseEngine(grammar); var parseRunner = new ParseRunner(parseEngine, input); var chart = GetParseEngineChart(parseEngine); for (int i = 0; i < 2; i++) { Assert.IsTrue(parseRunner.Read()); } Assert.IsTrue(parseRunner.Read()); Assert.AreEqual(2, chart.EarleySets.Count); }
public void ParseRunnerWhenNoLexemesMatchCharacterShouldCreateNewLexeme() { #if false const string input = "aaaa"; ProductionExpression A = "A", S = "S"; A.Rule = (Expr)'a' + 'a'; var aGrammar = new GrammarExpression(A, new[] { A }).ToGrammar(); var a = new GrammarLexerRule("a", aGrammar); S.Rule = (a + S) | a; var grammar = new GrammarExpression(S, new[] { S }).ToGrammar(); var parseEngine = new ParseEngine(grammar); var parseRunner = new ParseRunner(parseEngine, input); var chart = GetParseEngineChart(parseEngine); for (var i = 0; i < 3; i++) { Assert.IsTrue(parseRunner.Read()); } Assert.AreEqual(2, chart.Count); #endif }
public void ParseEngineAmbiguousNestedChildrenShouldCreateSameLeoAndClassicForest() { ProductionExpression Z = "Z", S = "S", A = "A", B = "B", C = "C", D = "D", E = "E", F = "F"; Z.Rule = S; S.Rule = A | B; A.Rule = '0' + C; B.Rule = '0' + C; C.Rule = D | E; D.Rule = '1' + F; E.Rule = '1' + F; F.Rule = '2'; const string input = "012"; var grammar = new GrammarExpression(S, new[] { S, A, B, C, D, E, F }).ToGrammar(); AssertLeoAndClassicParseAlgorithmsCreateSameForest(input, grammar); }
public void ParseEngineDivergentAmbiguousGrammarShouldCreateSameLeoAndClassicParseForest() { ProductionExpression S = "S", A = "A", B = "B", C = "C", X = "X", Y = "Y", Z = "Z"; S.Rule = '0' + A | '0' + X; A.Rule = '1' + B; B.Rule = '2' + C; C.Rule = '3'; X.Rule = '1' + Y; Y.Rule = '2' + Z; Z.Rule = '3'; const string input = "0123"; var grammar = new GrammarExpression( S, new[] { S, A, B, C, X, Y, Z }) .ToGrammar(); AssertLeoAndClassicParseAlgorithmsCreateSameForest(input, grammar); }
public void GrammarRulesConainingSymbolShouldReturnAllRulesContainingSymbol() { ProductionExpression S = nameof(S), A = nameof(A), B = nameof(B), C = nameof(C); S.Rule = A | B; A.Rule = A | C | 'a'; B.Rule = 'b' | B; C.Rule = 'c'; var grammarExpression = new GrammarExpression(S); var grammar = grammarExpression.ToGrammar(); var rulesContainingA = grammar.RulesContainingSymbol(A.ProductionModel.LeftHandSide.NonTerminal); Assert.AreEqual(2, rulesContainingA.Count); var rulesContainingB = grammar.RulesContainingSymbol(B.ProductionModel.LeftHandSide.NonTerminal); Assert.AreEqual(2, rulesContainingB.Count); var rulesContainingC = grammar.RulesContainingSymbol(C.ProductionModel.LeftHandSide.NonTerminal); Assert.AreEqual(1, rulesContainingC.Count); var rulesContainingS = grammar.RulesContainingSymbol(S.ProductionModel.LeftHandSide.NonTerminal); Assert.AreEqual(0, rulesContainingS.Count); }
public void GrammarShouldDiscoverEmptyProductionsWithTracibility() { ProductionExpression S = nameof(S), A = nameof(A), B = nameof(B), C = nameof(C), D = nameof(D), E = nameof(E), F = nameof(F), G = nameof(G); S.Rule = A + B; A.Rule = C + 'a'; C.Rule = E; B.Rule = D; D.Rule = E + F; E.Rule = null; F.Rule = G; G.Rule = null; var grammar = new GrammarExpression(S, new[] { S, A, B, C, D, E, F, G }).ToGrammar(); var expectedEmpty = new[] { B, C, D, E, F, G }; var expectedNotEmpty = new[] { S, A }; foreach (var productionBuilder in expectedEmpty) { Assert.IsTrue(grammar.IsTransitiveNullable(productionBuilder.ProductionModel.LeftHandSide.NonTerminal)); } foreach (var productionBuilder in expectedNotEmpty) { Assert.IsFalse(grammar.IsTransitiveNullable(productionBuilder.ProductionModel.LeftHandSide.NonTerminal)); } }
public void GrammarShouldContainAllLexerRulesInSuppliedProductionsIgnoresAndTrivia() { ProductionExpression S = nameof(S), A = nameof(A), B = nameof(B), C = nameof(C); S.Rule = A | B; A.Rule = A | C | 'a'; B.Rule = 'b' | B; C.Rule = 'c'; var grammarExpression = new GrammarExpression( S, null, new[] { new WhitespaceLexerRule() }, new[] { new WordLexerRule() }); var grammar = grammarExpression.ToGrammar(); Assert.IsNotNull(grammar.LexerRules); Assert.AreEqual(5, grammar.LexerRules.Count); foreach (var rule in grammar.LexerRules) { Assert.IsNotNull(rule); } }
public void ParseEngineWhenScanCompletedShouldCreateInternalAndTerminalNodes() { ProductionExpression S = "S"; S.Rule = (Expr)'a'; var grammar = new GrammarExpression(S, new[] { S }) .ToGrammar(); var tokens = Tokenize("a"); var parseEngine = new ParseEngine(grammar); ParseInput(parseEngine, tokens); var parseNode = parseEngine.GetParseForestRootNode(); Assert.IsNotNull(parseNode); var S_0_1 = parseNode as ISymbolForestNode; Assert.IsNotNull(S_0_1); Assert.AreEqual(1, S_0_1.Children.Count); var S_0_1_1 = S_0_1.Children[0] as IAndForestNode; Assert.IsNotNull(S_0_1_1); Assert.AreEqual(1, S_0_1_1.Children.Count); var a_0_1 = S_0_1_1.Children[0] as ITokenForestNode; Assert.IsNotNull(a_0_1); Assert.AreEqual("a", a_0_1.Token.Value); }
public void TreeNodeShouldFlattenIntermediateNodes() { ProductionExpression S = "S", A = "A", B = "B", C = "C"; S.Rule = A + B + C; A.Rule = 'a'; B.Rule = 'b'; C.Rule = 'c'; var grammar = new GrammarExpression(S, new[] { S, A, B, C }).ToGrammar(); var input = "abc"; var treeNode = GetTreeNode(grammar, input); var childCount = 0; foreach (var child in treeNode.Children) { childCount++; if (child is IInternalTreeNode internalChild) { var grandChildCount = 0; foreach (var grandChild in internalChild.Children) { grandChildCount++; Assert.IsInstanceOfType(grandChild, typeof(ITokenTreeNode)); } Assert.AreEqual(1, grandChildCount); } else { Assert.Fail(); } } Assert.AreEqual(3, childCount); }
public void AycockHorspoolAlgorithmShouldAcceptVulnerableGrammar() { var a = new TerminalLexerRule( new CharacterTerminal('a'), new TokenName("a")); ProductionExpression SPrime = "S'", S = "S", A = "A", E = "E"; SPrime.Rule = S; S.Rule = (Expr)S | (A + A + A + A); A.Rule = (Expr)"a" | E; var expression = new GrammarExpression( SPrime, new[] { SPrime, S, A, E }); var grammar = expression.ToGrammar(); var parseEngine = new ParseEngine(grammar); parseEngine.Pulse(new VerbatimToken(a.TokenName, 0, "a")); //var privateObject = new PrivateObject(parseEngine); //var chart = privateObject.GetField("_chart") as Chart; var chart = parseEngine.Chart; Assert.IsNotNull(chart); Assert.AreEqual(2, chart.Count); Assert.IsTrue(parseEngine.IsAccepted()); }
public void ParseEngineLexemeShouldMatchLongestAcceptableTokenWhenGivenAmbiguity() { var lexemeList = new List <ParseEngineLexeme>(); ProductionExpression There = "there"; There.Rule = (Expr)'t' + 'h' + 'e' + 'r' + 'e'; var thereGrammar = new GrammarExpression(There, new[] { There }) .ToGrammar(); var thereLexerRule = new GrammarLexerRule(new TokenType(There.ProductionModel.LeftHandSide.NonTerminal.Value), thereGrammar); ProductionExpression Therefore = "therefore"; Therefore.Rule = (Expr)'t' + 'h' + 'e' + 'r' + 'e' + 'f' + 'o' + 'r' + 'e'; var thereforeGrammar = new GrammarExpression(Therefore, new[] { Therefore }) .ToGrammar(); var thereforeLexerRule = new GrammarLexerRule(new TokenType(Therefore.ProductionModel.LeftHandSide.NonTerminal.Value), thereforeGrammar); var input = "therefore"; var thereLexeme = new ParseEngineLexeme(thereLexerRule, input.AsCapture(), 0); lexemeList.Add(thereLexeme); var thereforeLexeme = new ParseEngineLexeme(thereforeLexerRule, input.AsCapture(), 0); lexemeList.Add(thereforeLexeme); var i = 0; for (; i < input.Length; i++) { var passedLexemes = lexemeList .Where(l => l.Scan()) .ToList(); // all existing lexemes have failed // fall back onto the lexemes that existed before // we read this character if (passedLexemes.Count() == 0) { break; } lexemeList = passedLexemes; } Assert.AreEqual(i, input.Length); Assert.AreEqual(1, lexemeList.Count); var remainingLexeme = lexemeList[0]; Assert.IsNotNull(remainingLexeme); Assert.IsTrue(remainingLexeme.IsAccepted()); }
public void GrammarShouldContainAllLexerRulesInReferencedGrammars() { var regex = new ProductionReferenceExpression(new RegexGrammar()); ProductionExpression S = nameof(S); S.Rule = regex; var grammarExpression = new GrammarExpression(S); var grammar = grammarExpression.ToGrammar(); Assert.IsNotNull(grammar.LexerRules); Assert.AreEqual(15, grammar.LexerRules.Count); }
public void TreeNodeWhenAmbiguousParseShouldReturnFirstParseTree() { ProductionExpression A = "A"; A.Rule = (A + '+' + A) | (A + '-' + A) | 'a'; var grammar = new GrammarExpression(A, new[] { A }).ToGrammar(); var input = "a+a+a"; var treeNode = GetTreeNode(grammar, input); }
public void ParseEngineGivenIntermediateStepsShouldCreateTransitionItems() { ProductionExpression S = "S", A = "A", B = "B"; S.Rule = A; A.Rule = 'a' + B; B.Rule = A | 'b'; var grammar = new GrammarExpression(S, new[] { S, A, B }).ToGrammar(); var input = Tokenize("aaab"); var parseEngine = new ParseEngine(grammar); ParseInput(parseEngine, input); }
public void ParseEngineShouldParseUnmarkedMiddleRecursion() { ProductionExpression S = "S"; S.Rule = 'a' + S + 'a' | 'a'; var grammar = new GrammarExpression(S, new[] { S }).ToGrammar(); var parseEngine = new ParseEngine(grammar); var tokens = Tokenize("aaaaaaaaa"); ParseInput(parseEngine, tokens); }
static CycleGrammar() { ProductionExpression A = nameof(A), B = nameof(B), C = nameof(C); A.Rule = B | 'a'; B.Rule = C | 'b'; C.Rule = A | 'c'; grammar = new GrammarExpression(A, new[] { A, B, C }).ToGrammar(); }
static JsonGrammar() { ProductionExpression Json = "Json", Object = "Object", Pair = "Pair", PairRepeat = "PairRepeat", Array = "Array", Value = "Value", ValueRepeat = "ValueRepeat"; var number = new NumberLexerRule(); var @string = String(); Json.Rule = Value; Object.Rule = '{' + PairRepeat + '}'; PairRepeat.Rule = Pair | (Pair + ',' + PairRepeat) | Expr.Epsilon; Pair.Rule = (Expr)@string + ':' + Value; Array.Rule = '[' + ValueRepeat + ']'; ValueRepeat.Rule = Value | (Value + ',' + ValueRepeat) | Expr.Epsilon; Value.Rule = (Expr)@string | number | Object | Array | "true" | "false" | "null"; grammar = new GrammarExpression( Json, null, new[] { new WhitespaceLexerRule() }) .ToGrammar(); }
private static Grammar CreateExpressionGrammar() { var digit = new TerminalLexerRule(DigitTerminal.Instance, new TokenName("digit")); ProductionExpression S = "S", M = "M", T = "T"; S.Rule = (S + '+' + M) | M; M.Rule = (M + '*' + T) | T; T.Rule = digit; var grammar = new GrammarExpression(S, new[] { S, M, T }).ToGrammar(); return(grammar); }
public void GrammarRulesForWhenProductionMatchesShouldReturnRules() { ProductionExpression B = "B", A = "A", S = "S"; S.Rule = A | B; A.Rule = 'a'; B.Rule = 'b'; var grammar = new GrammarExpression(S, new[] { S, A, B }) .ToGrammar(); var rules = grammar.ProductionsFor(A.ProductionModel.LeftHandSide.NonTerminal).ToList(); Assert.AreEqual(1, rules.Count); Assert.IsTrue(A.ProductionModel.LeftHandSide.NonTerminal.Is(rules[0].LeftHandSide)); }
public void ParseEngineGivenAmbiguousNullableRightRecursionShouldCreateMultipleParsePaths() { // example 1 section 3, Elizabeth Scott var tokens = Tokenize("aa"); ProductionExpression S = "S", T = "T", B = "B"; S.Rule = (S + T) | "a"; B.Rule = null; T.Rule = ("a" + B) | "a"; var grammar = new GrammarExpression(S, new[] { S, T, B }).ToGrammar(); var parseEngine = new ParseEngine(grammar, new ParseEngineOptions(false)); ParseInput(parseEngine, tokens); var parseForestRoot = parseEngine.GetParseForestRootNode(); var actual = parseForestRoot; var a_1_2 = new TokenForestNode(MakeToken("a", 1)); var expected = new SymbolForestNode( S.ProductionModel.LeftHandSide.NonTerminal, 0, 2, new AndForestNode( new SymbolForestNode( S.ProductionModel.LeftHandSide.NonTerminal, 0, 1, new AndForestNode(new TokenForestNode(MakeToken("a", 0)))), new SymbolForestNode( T.ProductionModel.LeftHandSide.NonTerminal, 1, 2, new AndForestNode(a_1_2), new AndForestNode( a_1_2, new SymbolForestNode( B.ProductionModel.LeftHandSide.NonTerminal, 2, 2, new AndForestNode(new SymbolForestNode(B.ProductionModel.LeftHandSide.NonTerminal, 2, 2))))))); AssertForestsAreEqual(expected, actual); }
public void ParseEngineShouldParseSimpleSubstitutionGrammar() { ProductionExpression A = "A", B = "B", C = "C"; A.Rule = (Expr)B + C; B.Rule = (Expr)'b'; C.Rule = (Expr)'c'; var grammar = new GrammarExpression(A, new[] { A, B, C }).ToGrammar(); var parseEngine = new ParseEngine(grammar); var tokens = Tokenize("bc"); ParseInput(parseEngine, tokens); }
public void ParseRunnerShouldParseSimpleWordSentence() { ProductionExpression S = "S"; S.Rule = _whitespaceRule | _whitespaceRule + S | _wordRule | _wordRule + S; var grammar = new GrammarExpression(S, new[] { S }).ToGrammar(); var input = "this is input"; var parseEngine = new ParseEngine(grammar); RunParse(parseEngine, input); }
public void DeterministicParseEngineShouldParseRepeatingRightRecursiveRule() { var number = new NumberLexerRule(); var openBracket = new TerminalLexerRule('['); var closeBracket = new TerminalLexerRule(']'); var comma = new TerminalLexerRule(','); ProductionExpression A = "A", V = "V", VR = "VR"; A.Rule = openBracket + VR + closeBracket; VR.Rule = V | V + comma + VR | (Expr)null; V.Rule = number; var grammar = new GrammarExpression( A, new[] { A, V, VR }).ToGrammar(); var determinisicParseEngine = new DeterministicParseEngine(grammar); var tokens = new[] { new Token("[", 0, openBracket.TokenType), new Token("1", 1, number.TokenType), new Token(",", 2, comma.TokenType), new Token("2", 3, number.TokenType), new Token("]", 4, closeBracket.TokenType) }; for (var i = 0; i < tokens.Length; i++) { var result = determinisicParseEngine.Pulse(tokens[i]); if (!result) { Assert.Fail($"Failure parsing at position {determinisicParseEngine.Location}"); } } var accepted = determinisicParseEngine.IsAccepted(); if (!accepted) { Assert.Fail($"Input was not accepted."); } }
private static IGrammar CreateExpressionGrammar() { var digit = new TerminalLexerRule( new DigitTerminal(), new TokenType("digit")); ProductionExpression S = "S", M = "M", T = "T"; S.Rule = S + '+' + M | M; M.Rule = M + '*' + T | T; T.Rule = digit; var grammar = new GrammarExpression(S, new[] { S, M, T }).ToGrammar(); return(grammar); }
private static GrammarLexerRule CreateWhitespaceRule() { ProductionExpression S = "S", whitespace = "whitespace"; S.Rule = whitespace | whitespace + S; whitespace.Rule = new WhitespaceTerminal(); var grammar = new GrammarExpression(S, new[] { S, whitespace }).ToGrammar(); return(new GrammarLexerRule(nameof(whitespace), grammar)); }
public void ChartEnqueShouldAvoidDuplication() { ProductionExpression L = "L"; var aToZ = new RangeTerminal('a', 'z'); L.Rule = L + aToZ | aToZ; var grammar = new GrammarExpression(L, new[] { L }).ToGrammar(); var chart = new Chart(); var dottedRule = new DottedRule(grammar.Productions[0], 0); var firstState = new NormalState(dottedRule, 1); var secondState = new NormalState(dottedRule, 1); chart.Enqueue(0, firstState); chart.Enqueue(0, secondState); Assert.AreEqual(1, chart.EarleySets[0].Predictions.Count); }