public abstract void VisitLeave(CapturingGroup expression);
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice1() { PrioritizedChoice newline = new PrioritizedChoice( new Literal { MatchText = "\r\n" }, // windows new Literal { MatchText = "\r\r" } // old macs ) .Or(new Literal { MatchText = "\n" }); // linux // Single Line Comment var singleLineComment = new Sequence( new Literal { MatchText = "//" }, new Sequence( new NotPredicate(newline), new AnyCharacter() ) .Star() ); // Multiline Comment var multiLineComment = new Sequence( new Literal { MatchText = "/*" }, new Sequence( new NotPredicate(new Literal { MatchText = "*/" }), new AnyCharacter() ) .Star() .Sequence(new Literal { MatchText = "*/" }) ); var comment = new PrioritizedChoice(singleLineComment, multiLineComment); var whitespace = new PrioritizedChoice( new CharacterClass { ClassExpression = "[ \t\r\n\v]" }, comment ); var label = new CapturingGroup("Label", new Sequence( new CharacterClass { ClassExpression = "[a-zA-Z_]" }, // must start with alpha character new ZeroOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9_]" }) ) ); var backreference = new CapturingGroup("DynamicBackReferencing", new Sequence( new Literal { MatchText = @"\k<" }, new Sequence(new ZeroOrMore(whitespace), label).Sequence( new ZeroOrMore(whitespace)) ) .Sequence( new Optional( new Sequence( new Sequence( new Literal { MatchText = "[" }, new CapturingGroup("CaseSensitive", new Literal { MatchText = @"\i" } ) ), new Literal { MatchText = "]" } ) ) ) .Sequence( new Sequence(new ZeroOrMore(whitespace), new Literal { MatchText = ">" }) ) ); var root = new CapturingGroup("Test", new Sequence( backreference, new NotPredicate(new AnyCharacter()) ) ); var input = @"\k< CapturedLabelVariableName >"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Test"); Assert.IsTrue(node.Children[0].Token.Name == "DynamicBackReferencing"); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "Label"); Assert.IsTrue(node.Children[0].Children[0].Token.ValueAsString(iterator) == "CapturedLabelVariableName"); }
public abstract void VisitEnter(CapturingGroup expression);
public abstract void VisitExecute(CapturingGroup expression);
public override void VisitLeave(CapturingGroup expression) { String name = expression.Name; Boolean reduceBySingleChildNode = expression.DoReplaceBySingleChildNode; IsMatchPredicate local = _matchStack.Pop(); _matchStack.Push( delegate(IInputIterator iterator) { if (expression.DoCreateCustomAstNode && _astNodeFactory == null) { throw new ArgumentNullException("Second constructor overload is required during instantiation. astNodeFactory requires to be set with this parser implementation."); } Int32 savePosition = iterator.Index; _sandbox.Peek().Push(new AstNode()); if (local(iterator)) { // predicates being processed should not append ast if (_disableCapturingGroup.Count > 0) { _sandbox.Peek().Pop(); iterator.Index = savePosition; return(true); } if (savePosition >= iterator.Index) { // Warn terminal does not consume and ast should not be created for it, yet it should return that it was successful match. _sandbox.Peek().Pop(); iterator.Index = savePosition; return(true); } Byte[] matchedBytes = iterator.Text(savePosition, iterator.Index - 1); if (_xmlBackReferenceLookup.ContainsKey(name)) { _xmlBackReferenceLookup[name].Push(matchedBytes); } else { _xmlBackReferenceLookup.Add(name, new Stack <Byte[]>()); _xmlBackReferenceLookup[name].Push(matchedBytes); } AstNode node = _sandbox.Peek().Pop(); node.Token = new TokenMatch(name, savePosition, iterator.Index - 1); if (expression.DoCreateCustomAstNode) { // create a custom astnode IAstNodeReplacement nodevisitor = _astNodeFactory.Create(node); nodevisitor.Token = node.Token; nodevisitor.Parent = node.Parent; nodevisitor.Children = node.Children; foreach (AstNode updateparent in nodevisitor.Children) { updateparent.Parent = nodevisitor; } // since the whole tree has not finished completing this.Parent will be null on this run. // logic inside astnodereplacement is to create properties, business names, that internally check Children collection to mine data. // you still will need a top level visitor to process tree after it completely available. node.Accept(nodevisitor); node = nodevisitor; } if (reduceBySingleChildNode) { if (node.Children.Count == 1) { node = node.Children[0]; } } if (_sandbox.Peek().Count > 0) { node.Parent = _sandbox.Peek().Peek(); _sandbox.Peek().Peek().Children.Add(node); } else { _sandbox.Peek().Push(node); // don't loose the root node // each successful sandbox will have 1 item left in the stack } return(true); } _sandbox.Peek().Pop(); iterator.Index = savePosition; return(false); }); }
public override void VisitLeave(AstNode node) { AExpression left; AExpression right; if (hasPassedNodeDefinition) { switch (node.Token.Name) { case "Statement": hasPassedNodeDefinition = false; var statement = (StatementAstNode)node; if (statement.IsCaptured) { var captureStatement = new CapturingGroup(statement.Name, expressionStack.Pop()); if ( statement.Children[0].Children[0].Children.Any(child => child.Token.Name == "OptionalFlags") && statement.Children[0].Children[0].Children[1].Children.Any(child => child.Token.Name == "ReplaceBySingleChild") ) { captureStatement.DoReplaceBySingleChildNode = true; // default is false } expression = captureStatement; } else { expression = expressionStack.Pop(); } // Assumes Terminals are at the top of the file and // final root non terminal expression is at the bottom. if (wrapWithRecursionRule.Contains(statement.Name)) { expression = new RecursionCreate(statement.Name, expression); } completedStatements.Add(statement.Name, expression); break; case "Sequence": var reverse = new Stack <AExpression>(); for (int i = 0; i < node.Children.Count; i++) { reverse.Push(expressionStack.Pop()); } Decimal sequence_cnt = (decimal)node.Children.Count - 1; for (; sequence_cnt > 0; sequence_cnt--) { left = reverse.Pop(); right = reverse.Pop(); reverse.Push( new Sequence(left, right) ); } expressionStack.Push(reverse.Pop()); break; case "PrioritizedChoice": Int32 cnt = node.Children.Count - 1; for (Int32 i = 0; i < cnt; i++) { right = expressionStack.Pop(); left = expressionStack.Pop(); expressionStack.Push( new PrioritizedChoice(left, right) ); } break; case "Prefix": switch (node.Token.ValueAsString(_inputIterator)[0].ToString()) { case "!": expressionStack.Push(new NotPredicate(expressionStack.Pop())); break; case "&": expressionStack.Push(new AndPredicate(expressionStack.Pop())); break; default: throw new Exception("Unsupported PEG Prefix."); } break; case "Suffix": switch (node.Children[0].Token.Name) { case "ZeroOrMore": expressionStack.Push(new ZeroOrMore(expressionStack.Pop())); break; case "OneOrMore": expressionStack.Push(new OneOrMore(expressionStack.Pop())); break; case "Optional": expressionStack.Push(new Optional(expressionStack.Pop())); break; case "LimitingRepetition": switch (node.Children[0].Children[1].Token.Name) { case "BETWEEN": expressionStack.Push(new LimitingRepetition(expressionStack.Pop()) { Min = Int32.Parse( node.Children[0].Children[1].Children[0]. Token.ValueAsString(_inputIterator)), Max = Int32.Parse( node.Children[0].Children[1].Children[1]. Token.ValueAsString(_inputIterator)) }); break; case "ATMOST": expressionStack.Push(new LimitingRepetition(expressionStack.Pop()) { Min = null, Max = Int32.Parse(node.Children[0].Children[1].Children[0].Token .ValueAsString(_inputIterator)) }); break; case "ATLEAST": expressionStack.Push(new LimitingRepetition(expressionStack.Pop()) { Min = Int32.Parse(node.Children[0].Children[1].Children[0]. Token.ValueAsString(_inputIterator)), Max = null }); break; case "EXACT": Int32 exactcount = Int32.Parse(node.Children[0].Children[1].Token.ValueAsString(_inputIterator)); expressionStack.Push(new LimitingRepetition(expressionStack.Pop()) { Min = exactcount, Max = exactcount }); break; case "VariableLength": var variableLengthExpression = node.Children[0].Children[1].Token.ValueAsString(_inputIterator); expressionStack.Push(new LimitingRepetition(expressionStack.Pop()) { VariableLengthExpression = variableLengthExpression }); break; } break; default: throw new Exception("Unsupported PEG Suffix."); } break; case "CapturingGroup": var capture = new CapturingGroup(node.Children[0].Token.ValueAsString(_inputIterator), expressionStack.Pop()); if (node.Children.Any(child => child.Token.Name == "OptionalFlags")) { if (node.Children[1].Children.Any(child => child.Token.Name == "ReplaceBySingleChild")) { capture.DoReplaceBySingleChildNode = true; // default is false } if (node.Children[1].Children.Any(child => child.Token.Name == "ReplacementNode")) { capture.DoCreateCustomAstNode = true; // default is false } } expressionStack.Push(capture); break; case "Group": break; case "AnyCharacter": expressionStack.Push(new AnyCharacter()); break; case "Literal": Boolean isCaseSensitive = true; if (node.Children.Count == 2) { isCaseSensitive = false; } expressionStack.Push(new Literal { IsCaseSensitive = isCaseSensitive, MatchText = Regex.Replace( Regex.Replace(node.Children[0].Token.ValueAsString(_inputIterator), @"\\(?<quote>""|')", @"${quote}") , @"\\\\", @"\") }); break; case "CharacterClass": expressionStack.Push(new CharacterClass { ClassExpression = node.Token.ValueAsString(_inputIterator) }); break; case "RecursionCall": expressionStack.Push((this)[node.Children[0].Token.ValueAsString(_inputIterator)]); break; case "CodePoint": expressionStack.Push(new CodePoint { Match = "#" + node.Children[0].Token.ValueAsString(_inputIterator) }); break; case "Fatal": expressionStack.Push(new Fatal { Message = node.Children[0].Token.ValueAsString(_inputIterator) }); break; case "Warn": expressionStack.Push(new Warn { Message = node.Children[0].Token.ValueAsString(_inputIterator) }); break; case "DynamicBackReferencing": if (node.Children.Count == 1) { // no options specified only tag name. expressionStack.Push(new DynamicBackReference { BackReferenceName = node.Children[0].Token.ValueAsString(_inputIterator) }); } else { throw new NotImplementedException( "Add IsCaseSensitive using children[1].Token.Name == IsCasesensitive"); } break; } } if (node.Token.Name == "NodeDefinition") { hasPassedNodeDefinition = true; } }
// name, rule text public override void VisitEnter(CapturingGroup expression) { terminal.Push(new StringBuilder()); }
public void CompositeVisitor_NestedRecursive() { #region Composite var DIGITS = new CapturingGroup("DIGITS", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })); var LTENCLOSED = new RecursionCreate("RECURSIONLTENCLOSED", new PrioritizedChoice(DIGITS, new CapturingGroup("LTENCLOSED", new Sequence( new Literal { MatchText = "<" }, new RecursionCall( "RECURSIONLTENCLOSED") ).Sequence(new Literal { MatchText = ">" }) ) ) ); var PENCLOSED = new RecursionCreate("RECURSIONPENCLOSED", new PrioritizedChoice(LTENCLOSED, new CapturingGroup("PENCLOSED", new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONPENCLOSED") ).Sequence(new Literal { MatchText = ")" }) ) ) ); AExpression ROOT = new CapturingGroup("NESTEDRECURSIONTEST", PENCLOSED); #endregion var input = "(((<<<123>>>)))"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); Assert.IsTrue(node.Token.Name == "NESTEDRECURSIONTEST"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); }
public void Terminal_DynamicBackReference_Recursive() { String input = @" <test> test data start <test1> test1 data start <test2> text2 data start text2 data end </test2> test1 data end </test1> test data end </test> "; var TAG = new CapturingGroup("TAG", new OneOrMore( new CharacterClass { ClassExpression = "[a-zA-Z0-9]" } ) ); var StartTag = new CapturingGroup("START_TAG", new Sequence( new Literal { MatchText = "<" }, TAG) .Sequence( new Literal { MatchText = ">" } ) ); var EndTag = new CapturingGroup("END_TAG", new Sequence( new Literal { MatchText = "</" }, new DynamicBackReference { BackReferenceName = "TAG", IsCaseSensitive = true } ) .Sequence( new Literal { MatchText = ">" } ) ); var Body = new CapturingGroup("Body", new PrioritizedChoice( new RecursionCall("MATCHXML"), new Sequence(new NotPredicate(EndTag), new AnyCharacter()) ).Star() ); var Expression = new CapturingGroup("Expression", new RecursionCreate("MATCHXML", new Sequence(StartTag, Body) .Sequence(EndTag) .Plus() ) ); var bytes = Encoding.UTF8.GetBytes(input.Trim()); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; }
public void PracticalExample_BooleanAlgebra() { #region Composite //AND: */AND AExpression AND = new PrioritizedChoice(new Literal { MatchText = "*" }, new Literal { MatchText = "AND" }); //NAND: ~*/NAND AExpression NAND = new PrioritizedChoice(new Literal { MatchText = "~*" }, new Literal { MatchText = "NAND" }); //OR: +/OR AExpression OR = new PrioritizedChoice(new Literal { MatchText = "+" }, new Literal { MatchText = "OR" }); //NOR: ~+/NOR AExpression NOR = new PrioritizedChoice(new Literal { MatchText = "~+" }, new Literal { MatchText = "NOR" }); //XOR: ^/XOR AExpression XOR = new PrioritizedChoice(new Literal { MatchText = "^" }, new Literal { MatchText = "XOR" }); //XNOR: ~^/XNOR AExpression XNOR = new PrioritizedChoice(new Literal { MatchText = "~^" }, new Literal { MatchText = "XNOR" }); AExpression GATE = new CapturingGroup("GATE", new PrioritizedChoice(AND, NAND).Or(OR).Or(NOR).Or(XOR).Or(XNOR)); // Variable: "[a-zA-Z0-9]+" / '[a-zA-Z0-9]+' / [a-zA-Z] AExpression VARIABLE = new PrioritizedChoice( new Sequence( new Literal { MatchText = "\"" }, new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9]" })) ).Sequence(new Literal { MatchText = "\"" }), new Sequence( new Literal { MatchText = "'" }, new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9]" })) ).Sequence(new Literal { MatchText = "'" }) ).Or( new CapturingGroup("VARIABLE", new CharacterClass { ClassExpression = "[a-zA-Z]" }) ); // Variable: Variable / !Variable VARIABLE = new PrioritizedChoice( VARIABLE , new CapturingGroup("INVERTOR", new Sequence( new Literal { MatchText = "!" }, VARIABLE ) ) ); // Variable: Variable / Expression / !Expression VARIABLE = new PrioritizedChoice( VARIABLE , new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONEXPRESSION") ).Sequence(new Literal { MatchText = ")" }) ).Or( new CapturingGroup("INVERTOR", new Sequence( new Literal { MatchText = "!" } , new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONEXPRESSION") ).Sequence(new Literal { MatchText = ")" }) ) ) ); AExpression Root = new CapturingGroup("BOOLEANEQUATION", new Sequence( new RecursionCreate("RECURSIONEXPRESSION", //Expression: Variable ((AND|NAND|OR|NOR|XOR|XNOR) Variable)* new Sequence(VARIABLE, new Sequence(GATE, VARIABLE).Star()) ) , // ensure reaches end of file new NotPredicate(new AnyCharacter()) ) ); #endregion // single variable var input = "A*!B+!A*B"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION"); Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "A"); Assert.IsTrue(node.Children[1].Token.Name == "GATE"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*"); Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "B"); Assert.IsTrue(node.Children[3].Token.Name == "GATE"); Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "A"); Assert.IsTrue(node.Children[5].Token.Name == "GATE"); Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "B"); // quoted variable input = "'aA'*!'bB'+!'aA'*'bB'"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION"); Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "aA"); Assert.IsTrue(node.Children[1].Token.Name == "GATE"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*"); Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "bB"); Assert.IsTrue(node.Children[3].Token.Name == "GATE"); Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "aA"); Assert.IsTrue(node.Children[5].Token.Name == "GATE"); Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "bB"); // expression + gate + variable .star() input = "A*!B*C+!A*B*C"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); // parethesis input = "((A)*(!B)+(!A)*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = "((A)*!(B)+!(A)*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = "((A)*(!(B))+(!(A))*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)+(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("((((!X*Y*Z)+(!X*Y*!Z)+(X*Z))))"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); }
public void Terminal_LimitingRepetition() { // min // min max // max // math expression using back referencing {(\k<C2> - \k<C1>)+1} - variable length protocols AExpression Digits = new CharacterClass { ClassExpression = "[0-9]" }; #region nonterminals var MinTrue0 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) { Min = 0 } ); var MinFalse = new CapturingGroup("MinFalse", new LimitingRepetition(Digits) { Min = 44 } ); var MinTrue5 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) { Min = 5 } ); var MaxTrue = new CapturingGroup("MaxTrue", new LimitingRepetition(Digits) { Max = 5 } ); var MinMax = new CapturingGroup("MinMax", new LimitingRepetition(Digits) { Min = 5, Max = 6 } ); var ExceptionNoMinMax = new CapturingGroup("ExceptionNoMinMax", new LimitingRepetition(Digits) { } ); var ExceptionMaxLessThanMin = new CapturingGroup("ExceptionMaxLessThanMin", new LimitingRepetition(Digits) { Min = 5, Max = 0 } ); #endregion String input = "1234567890"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); MinTrue0.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinFalse.Accept(visitor); Assert.IsFalse(visitor.IsMatch); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinTrue5.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MaxTrue.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Int32 exceptionCount = 0; try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionNoMinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionMaxLessThanMin.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } Assert.IsTrue(exceptionCount == 2); }
public void Terminal_CodePoint_Binary() { Assert.IsTrue((Byte)'a' == 97); Assert.IsTrue((Byte)'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var binary = new CapturingGroup("Binary", new CodePoint { Match = "#b1100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "a"); input = "aa"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#b0110000101100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "aa"); // Byte boundary tests input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#b00001100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new Sequence(new CodePoint { Match = "#b000" }, new CodePoint { Match = "#b01100001" })); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); // Don't care tests input = Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new OneOrMore(new CodePoint { Match = "#bXXXX0001" }) // #bXXXX0001 ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 })); input = Encoding.ASCII.GetString(new byte[] { 0x10 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#bXXXX0001" }); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = ""; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#bXXXX0001" }); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void Terminal_CodePoint_Hexadecimal() { Assert.IsTrue((Byte)'a' == 97); Assert.IsTrue((Byte)'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#x61" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "a"); // Byte boundary tests input = "\na"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xA61" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 0A = \n and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\na"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#x061" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 00 = \0 and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\0a"); // Don't care tests bytes = new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 }; iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new OneOrMore(new CodePoint { Match = "#xX1" }) // #bXXXX0001 ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 })); iterator = new ByteInputIterator(new byte[] { 0x10 }); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xX1" } ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = string.Empty; iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input)); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xX1" } ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice2() { var mSpace = new CharacterClass { ClassExpression = "[ \t]" }; var limiting = new CapturingGroup("LimitingRepetition", new Sequence( new Sequence( new Literal { MatchText = "{" }, new ZeroOrMore(mSpace) ), new PrioritizedChoice( new CapturingGroup("BETWEEN", new Sequence( new CapturingGroup("Min", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })) .Sequence(new ZeroOrMore(mSpace)), new Literal { MatchText = "," } ) .Sequence( new Sequence(new ZeroOrMore(mSpace), new CapturingGroup("Max", new OneOrMore( new CharacterClass { ClassExpression = "[0-9]" }))) ) ) , new CapturingGroup("ATMOST", new Sequence( new Literal { MatchText = "," } , new Sequence(new ZeroOrMore(mSpace), new CapturingGroup("Max", new OneOrMore( new CharacterClass { ClassExpression = "[0-9]" }))) ) ) ) .Or ( new CapturingGroup("ATLEAST", new Sequence( new Sequence(new ZeroOrMore(mSpace), new CapturingGroup("Min", new OneOrMore( new CharacterClass { ClassExpression = "[0-9]" }))).Sequence( new ZeroOrMore(mSpace)) , new Literal { MatchText = "," } ) ) ) .Or ( new CapturingGroup("EXACT", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })) ) ) .Sequence( new ZeroOrMore(mSpace) ) .Sequence( new Literal { MatchText = "}" } ) ); var any = new CapturingGroup("AnyCharacter", new Literal { MatchText = "." }); var expression = new CapturingGroup("Expression", new PrioritizedChoice( new Sequence(any, limiting), new Sequence(limiting, any) ) ); var input = ".{77,55}"; var bytes = Encoding.UTF8.GetBytes(input); var visitor = new NpegParserVisitor( new ByteInputIterator(bytes) ); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Expression"); Assert.IsTrue(node.Children.Count == 2); Assert.IsTrue(node.Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[1].Token.Name == "LimitingRepetition"); }
public override void VisitExecute(CapturingGroup expression) { }
public void CompositeVisitor_Recursiveness() { var whitespace = new CharacterClass { ClassExpression = "[ \t\r\n\v]" }; var terminal = new PrioritizedChoice( new CapturingGroup("AnyCharacter", new Literal { MatchText = "." }) , new CapturingGroup("CapturingGroup", new Sequence( new Literal { MatchText = "(?<" }, new CapturingGroup("ReplacementNode", new OneOrMore( new CharacterClass { ClassExpression = "[a-z0-9A-Z]" } ) ) ) .Sequence(new Literal { MatchText = ">" }) .Sequence(new RecursionCall("Expression")) .Sequence(new Literal { MatchText = ")" }) ) ); var sequence = new CapturingGroup( "Sequence", new Sequence( terminal, new ZeroOrMore(whitespace) ).Plus() ) { DoReplaceBySingleChildNode = true }; var prioritizedchoice = new CapturingGroup("PrioritizedChoice", new Sequence( sequence, new Literal { MatchText = "/" } ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Sequence( new ZeroOrMore( new Sequence( new ZeroOrMore(whitespace), new Literal { MatchText = "/" } ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Plus() ) ) ); var expression = new CapturingGroup("Root", new RecursionCreate("Expression", new PrioritizedChoice(prioritizedchoice, sequence))); var input = @"(?<NPEGNode>./.. )"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Root"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "CapturingGroup"); Assert.IsTrue(node.Children[0].Children.Count == 2); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "ReplacementNode"); Assert.IsTrue(node.Children[0].Children[1].Token.Name == "PrioritizedChoice"); Assert.IsTrue(node.Children[0].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Token.Name == "Sequence"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[1].Token.Name == "AnyCharacter"); }
public override void VisitLeave(CapturingGroup expression) { String rule = terminal.Pop().ToString(); var nodeText = new StringBuilder(); nodeText.Append("(?<"); nodeText.Append(expression.Name); if (expression.DoCreateCustomAstNode || expression.DoReplaceBySingleChildNode) { nodeText.Append("["); if (expression.DoReplaceBySingleChildNode) { nodeText.Append("\rsc"); } if (expression.DoCreateCustomAstNode) { nodeText.Append("\rn"); } nodeText.Append("]"); } nodeText.Append(">"); if (uniqueCapturedGroup.ContainsKey(expression.Name)) { if (uniqueCapturedGroup[expression.Name] == rule) { terminal.Peek().Append(expression.Name); } else { //same name but different rule so write inline nodeText.Append(rule); nodeText.Append(")"); terminal.Peek().Append(nodeText.ToString()); } } else { if (uniqueCapturedGroup.ContainsValue(rule)) { // different name same rule String name = uniqueCapturedGroup.Where(kvp => kvp.Value == rule).Select(kvp => kvp.Key).First(); terminal.Peek().Append(name); } else { nodeText.Append("): "); nodeText.Append(rule); nodeText.Append(";"); statements.Add(nodeText.ToString()); uniqueCapturedGroup.Add(expression.Name, rule); if (terminal.Count > 0) { terminal.Peek().Append(expression.Name); } } } }
public BalancingGroup() { balance = null; }
public override void VisitEnter(CapturingGroup expression) { }