public void Terminal_DynamicBackReference() { #region Composite AExpression TAG = new CapturingGroup("TAG", new OneOrMore( new CharacterClass { ClassExpression = "[a-zA-Z0-9]" } ) ); AExpression StartTag = new CapturingGroup("START_TAG", new Sequence( new Literal { MatchText = "<" }, TAG) .Sequence( new Literal { MatchText = ">" } ) ); AExpression EndTag = new CapturingGroup("END_TAG", new Sequence( new Literal { MatchText = "</" }, new DynamicBackReference { BackReferenceName = "TAG", IsCaseSensitive = true } ) .Sequence( new Literal { MatchText = ">" } ) ); AExpression Body = new CapturingGroup("Body", new Sequence(new NotPredicate(EndTag), new AnyCharacter()).Star()); AExpression Expression = new CapturingGroup("Expression", new Sequence(StartTag, Body).Sequence(EndTag).Plus()); #endregion String input = "<h1>hello</h1><h2>hello</h2>"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; #warning write tree }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice3() { var prefix = new PrioritizedChoice( new CapturingGroup("AndPredicate", new Literal { MatchText = "&" }), new CapturingGroup("NotPredicate", new Literal { MatchText = "!" }) ); PrioritizedChoice suffix = new PrioritizedChoice( new CapturingGroup("ZeroOrMore", new Literal { MatchText = "*" }), new CapturingGroup("OneOrMore", new Literal { MatchText = "+" }) ) .Or(new CapturingGroup("Optional", new Literal { MatchText = "?" })); var terminal = new CapturingGroup("AnyCharacter", new Literal { MatchText = "." }); var expression = new CapturingGroup("Expression", new PrioritizedChoice( // match prefixes first prefix.Plus() .Sequence(terminal) , // match suffixes next terminal .Sequence( suffix.Plus() ) ) .Or(terminal) .Plus() ); var input = "."; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Token.Name == "Expression"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "."); Assert.IsTrue(node.Children[0].Token.Name == "AnyCharacter"); }
public void CompositeVisitor_RecursiveParentheses() { #region Composite AExpression DIGITS = new CapturingGroup("DIGITS", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })); AExpression ENCLOSEDDIGITS = new RecursionCreate("ParethesisFunction", new PrioritizedChoice( DIGITS , new CapturingGroup("ENCLOSEDDIGITS", new Sequence( new Literal { MatchText = "(" }, new RecursionCall("ParethesisFunction") ).Sequence(new Literal { MatchText = ")" }) ) ) ); AExpression ROOT = new CapturingGroup("RECURSIONTEST", ENCLOSEDDIGITS); #endregion var input = Encoding.UTF8.GetBytes("((((((123))))))"); var visitor = new NpegParserVisitor(new ByteInputIterator(input)); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "RECURSIONTEST"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "ENCLOSEDDIGITS"); Assert.IsTrue(node.Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "ENCLOSEDDIGITS"); Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Token.Name == "ENCLOSEDDIGITS"); Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Token.Name == "ENCLOSEDDIGITS"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "ENCLOSEDDIGITS"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "ENCLOSEDDIGITS"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "DIGITS"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 0); }
public void PracticalExample_PhoneNumber() { #region terminals AExpression Digits = new CharacterClass { ClassExpression = "[0-9]" }; AExpression Hyphen = new Literal { MatchText = "-" }; #endregion #region nonterminals AExpression ThreeDigitCode = new CapturingGroup("ThreeDigitCode", new Sequence(Digits, Digits).Sequence(Digits)); AExpression FourDigitCode = new CapturingGroup("FourDigitCode", new Sequence(Digits, Digits).Sequence(Digits).Sequence(Digits)); AExpression PhoneNumber = new CapturingGroup("PhoneNumber", new Sequence(ThreeDigitCode, Hyphen) .Sequence(ThreeDigitCode) .Sequence(Hyphen) .Sequence(FourDigitCode) ); #endregion String input = "123-456-7890"; // Test Manual Composite var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); PhoneNumber.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "PhoneNumber"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "123-456-7890"); Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123"); Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456"); Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode"); Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890"); }
public void Terminal_CodePoint_Decimal() { var input = "&"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var codepoint = new CapturingGroup("CodePoint", new CodePoint { Match = "#38" } ); codepoint.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "CodePoint"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "&"); }
public void CompositeVisitor_NestedRecursive() { #region Composite var DIGITS = new CapturingGroup("DIGITS", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })); var LTENCLOSED = new RecursionCreate("RECURSIONLTENCLOSED", new PrioritizedChoice(DIGITS, new CapturingGroup("LTENCLOSED", new Sequence( new Literal { MatchText = "<" }, new RecursionCall( "RECURSIONLTENCLOSED") ).Sequence(new Literal { MatchText = ">" }) ) ) ); var PENCLOSED = new RecursionCreate("RECURSIONPENCLOSED", new PrioritizedChoice(LTENCLOSED, new CapturingGroup("PENCLOSED", new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONPENCLOSED") ).Sequence(new Literal { MatchText = ")" }) ) ) ); AExpression ROOT = new CapturingGroup("NESTEDRECURSIONTEST", PENCLOSED); #endregion var input = "(((<<<123>>>)))"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); Assert.IsTrue(node.Token.Name == "NESTEDRECURSIONTEST"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); }
public void CompositeVisitor_Recursiveness() { var whitespace = new CharacterClass { ClassExpression = "[ \t\r\n\v]" }; var terminal = new PrioritizedChoice( new CapturingGroup("AnyCharacter", new Literal { MatchText = "." }) , new CapturingGroup("CapturingGroup", new Sequence( new Literal { MatchText = "(?<" }, new CapturingGroup("ReplacementNode", new OneOrMore( new CharacterClass { ClassExpression = "[a-z0-9A-Z]" } ) ) ) .Sequence(new Literal { MatchText = ">" }) .Sequence(new RecursionCall("Expression")) .Sequence(new Literal { MatchText = ")" }) ) ); var sequence = new CapturingGroup( "Sequence", new Sequence( terminal, new ZeroOrMore(whitespace) ).Plus() ) { DoReplaceBySingleChildNode = true }; var prioritizedchoice = new CapturingGroup("PrioritizedChoice", new Sequence( sequence, new Literal { MatchText = "/" } ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Sequence( new ZeroOrMore( new Sequence( new ZeroOrMore(whitespace), new Literal { MatchText = "/" } ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Plus() ) ) ); var expression = new CapturingGroup("Root", new RecursionCreate("Expression", new PrioritizedChoice(prioritizedchoice, sequence))); var input = @"(?<NPEGNode>./.. )"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Root"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "CapturingGroup"); Assert.IsTrue(node.Children[0].Children.Count == 2); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "ReplacementNode"); Assert.IsTrue(node.Children[0].Children[1].Token.Name == "PrioritizedChoice"); Assert.IsTrue(node.Children[0].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Token.Name == "Sequence"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[1].Token.Name == "AnyCharacter"); }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice2() { var mSpace = new CharacterClass { ClassExpression = "[ \t]" }; var limiting = new CapturingGroup("LimitingRepetition", new Sequence( new Sequence( new Literal { MatchText = "{" }, new ZeroOrMore(mSpace) ), new PrioritizedChoice( new CapturingGroup("BETWEEN", new Sequence( new CapturingGroup("Min", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })) .Sequence(new ZeroOrMore(mSpace)), new Literal { MatchText = "," } ) .Sequence( new Sequence(new ZeroOrMore(mSpace), new CapturingGroup("Max", new OneOrMore( new CharacterClass { ClassExpression = "[0-9]" }))) ) ) , new CapturingGroup("ATMOST", new Sequence( new Literal { MatchText = "," } , new Sequence(new ZeroOrMore(mSpace), new CapturingGroup("Max", new OneOrMore( new CharacterClass { ClassExpression = "[0-9]" }))) ) ) ) .Or ( new CapturingGroup("ATLEAST", new Sequence( new Sequence(new ZeroOrMore(mSpace), new CapturingGroup("Min", new OneOrMore( new CharacterClass { ClassExpression = "[0-9]" }))).Sequence( new ZeroOrMore(mSpace)) , new Literal { MatchText = "," } ) ) ) .Or ( new CapturingGroup("EXACT", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })) ) ) .Sequence( new ZeroOrMore(mSpace) ) .Sequence( new Literal { MatchText = "}" } ) ); var any = new CapturingGroup("AnyCharacter", new Literal { MatchText = "." }); var expression = new CapturingGroup("Expression", new PrioritizedChoice( new Sequence(any, limiting), new Sequence(limiting, any) ) ); var input = ".{77,55}"; var bytes = Encoding.UTF8.GetBytes(input); var visitor = new NpegParserVisitor( new ByteInputIterator(bytes) ); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Expression"); Assert.IsTrue(node.Children.Count == 2); Assert.IsTrue(node.Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[1].Token.Name == "LimitingRepetition"); }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice1() { PrioritizedChoice newline = new PrioritizedChoice( new Literal { MatchText = "\r\n" }, // windows new Literal { MatchText = "\r\r" } // old macs ) .Or(new Literal { MatchText = "\n" }); // linux // Single Line Comment var singleLineComment = new Sequence( new Literal { MatchText = "//" }, new Sequence( new NotPredicate(newline), new AnyCharacter() ) .Star() ); // Multiline Comment var multiLineComment = new Sequence( new Literal { MatchText = "/*" }, new Sequence( new NotPredicate(new Literal { MatchText = "*/" }), new AnyCharacter() ) .Star() .Sequence(new Literal { MatchText = "*/" }) ); var comment = new PrioritizedChoice(singleLineComment, multiLineComment); var whitespace = new PrioritizedChoice( new CharacterClass { ClassExpression = "[ \t\r\n\v]" }, comment ); var label = new CapturingGroup("Label", new Sequence( new CharacterClass { ClassExpression = "[a-zA-Z_]" }, // must start with alpha character new ZeroOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9_]" }) ) ); var backreference = new CapturingGroup("DynamicBackReferencing", new Sequence( new Literal { MatchText = @"\k<" }, new Sequence(new ZeroOrMore(whitespace), label).Sequence( new ZeroOrMore(whitespace)) ) .Sequence( new Optional( new Sequence( new Sequence( new Literal { MatchText = "[" }, new CapturingGroup("CaseSensitive", new Literal { MatchText = @"\i" } ) ), new Literal { MatchText = "]" } ) ) ) .Sequence( new Sequence(new ZeroOrMore(whitespace), new Literal { MatchText = ">" }) ) ); var root = new CapturingGroup("Test", new Sequence( backreference, new NotPredicate(new AnyCharacter()) ) ); var input = @"\k< CapturedLabelVariableName >"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Test"); Assert.IsTrue(node.Children[0].Token.Name == "DynamicBackReferencing"); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "Label"); Assert.IsTrue(node.Children[0].Children[0].Token.ValueAsString(iterator) == "CapturedLabelVariableName"); }
public void PracticalExample_BooleanAlgebra() { #region Composite //AND: */AND AExpression AND = new PrioritizedChoice(new Literal { MatchText = "*" }, new Literal { MatchText = "AND" }); //NAND: ~*/NAND AExpression NAND = new PrioritizedChoice(new Literal { MatchText = "~*" }, new Literal { MatchText = "NAND" }); //OR: +/OR AExpression OR = new PrioritizedChoice(new Literal { MatchText = "+" }, new Literal { MatchText = "OR" }); //NOR: ~+/NOR AExpression NOR = new PrioritizedChoice(new Literal { MatchText = "~+" }, new Literal { MatchText = "NOR" }); //XOR: ^/XOR AExpression XOR = new PrioritizedChoice(new Literal { MatchText = "^" }, new Literal { MatchText = "XOR" }); //XNOR: ~^/XNOR AExpression XNOR = new PrioritizedChoice(new Literal { MatchText = "~^" }, new Literal { MatchText = "XNOR" }); AExpression GATE = new CapturingGroup("GATE", new PrioritizedChoice(AND, NAND).Or(OR).Or(NOR).Or(XOR).Or(XNOR)); // Variable: "[a-zA-Z0-9]+" / '[a-zA-Z0-9]+' / [a-zA-Z] AExpression VARIABLE = new PrioritizedChoice( new Sequence( new Literal { MatchText = "\"" }, new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9]" })) ).Sequence(new Literal { MatchText = "\"" }), new Sequence( new Literal { MatchText = "'" }, new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9]" })) ).Sequence(new Literal { MatchText = "'" }) ).Or( new CapturingGroup("VARIABLE", new CharacterClass { ClassExpression = "[a-zA-Z]" }) ); // Variable: Variable / !Variable VARIABLE = new PrioritizedChoice( VARIABLE , new CapturingGroup("INVERTOR", new Sequence( new Literal { MatchText = "!" }, VARIABLE ) ) ); // Variable: Variable / Expression / !Expression VARIABLE = new PrioritizedChoice( VARIABLE , new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONEXPRESSION") ).Sequence(new Literal { MatchText = ")" }) ).Or( new CapturingGroup("INVERTOR", new Sequence( new Literal { MatchText = "!" } , new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONEXPRESSION") ).Sequence(new Literal { MatchText = ")" }) ) ) ); AExpression Root = new CapturingGroup("BOOLEANEQUATION", new Sequence( new RecursionCreate("RECURSIONEXPRESSION", //Expression: Variable ((AND|NAND|OR|NOR|XOR|XNOR) Variable)* new Sequence(VARIABLE, new Sequence(GATE, VARIABLE).Star()) ) , // ensure reaches end of file new NotPredicate(new AnyCharacter()) ) ); #endregion // single variable var input = "A*!B+!A*B"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION"); Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "A"); Assert.IsTrue(node.Children[1].Token.Name == "GATE"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*"); Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "B"); Assert.IsTrue(node.Children[3].Token.Name == "GATE"); Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "A"); Assert.IsTrue(node.Children[5].Token.Name == "GATE"); Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "B"); // quoted variable input = "'aA'*!'bB'+!'aA'*'bB'"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION"); Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "aA"); Assert.IsTrue(node.Children[1].Token.Name == "GATE"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*"); Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "bB"); Assert.IsTrue(node.Children[3].Token.Name == "GATE"); Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "aA"); Assert.IsTrue(node.Children[5].Token.Name == "GATE"); Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "bB"); // expression + gate + variable .star() input = "A*!B*C+!A*B*C"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); // parethesis input = "((A)*(!B)+(!A)*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = "((A)*!(B)+!(A)*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = "((A)*(!(B))+(!(A))*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)+(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("((((!X*Y*Z)+(!X*Y*!Z)+(X*Z))))"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); }
public void Terminal_DynamicBackReference_Recursive() { String input = @" <test> test data start <test1> test1 data start <test2> text2 data start text2 data end </test2> test1 data end </test1> test data end </test> "; var TAG = new CapturingGroup("TAG", new OneOrMore( new CharacterClass { ClassExpression = "[a-zA-Z0-9]" } ) ); var StartTag = new CapturingGroup("START_TAG", new Sequence( new Literal { MatchText = "<" }, TAG) .Sequence( new Literal { MatchText = ">" } ) ); var EndTag = new CapturingGroup("END_TAG", new Sequence( new Literal { MatchText = "</" }, new DynamicBackReference { BackReferenceName = "TAG", IsCaseSensitive = true } ) .Sequence( new Literal { MatchText = ">" } ) ); var Body = new CapturingGroup("Body", new PrioritizedChoice( new RecursionCall("MATCHXML"), new Sequence(new NotPredicate(EndTag), new AnyCharacter()) ).Star() ); var Expression = new CapturingGroup("Expression", new RecursionCreate("MATCHXML", new Sequence(StartTag, Body) .Sequence(EndTag) .Plus() ) ); var bytes = Encoding.UTF8.GetBytes(input.Trim()); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; }
public void Terminal_LimitingRepetition() { // min // min max // max // math expression using back referencing {(\k<C2> - \k<C1>)+1} - variable length protocols AExpression Digits = new CharacterClass { ClassExpression = "[0-9]" }; #region nonterminals var MinTrue0 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) { Min = 0 } ); var MinFalse = new CapturingGroup("MinFalse", new LimitingRepetition(Digits) { Min = 44 } ); var MinTrue5 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) { Min = 5 } ); var MaxTrue = new CapturingGroup("MaxTrue", new LimitingRepetition(Digits) { Max = 5 } ); var MinMax = new CapturingGroup("MinMax", new LimitingRepetition(Digits) { Min = 5, Max = 6 } ); var ExceptionNoMinMax = new CapturingGroup("ExceptionNoMinMax", new LimitingRepetition(Digits) { } ); var ExceptionMaxLessThanMin = new CapturingGroup("ExceptionMaxLessThanMin", new LimitingRepetition(Digits) { Min = 5, Max = 0 } ); #endregion String input = "1234567890"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); MinTrue0.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinFalse.Accept(visitor); Assert.IsFalse(visitor.IsMatch); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinTrue5.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MaxTrue.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Int32 exceptionCount = 0; try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionNoMinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionMaxLessThanMin.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } Assert.IsTrue(exceptionCount == 2); }
public void Terminal_CodePoint_Binary() { Assert.IsTrue((Byte)'a' == 97); Assert.IsTrue((Byte)'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var binary = new CapturingGroup("Binary", new CodePoint { Match = "#b1100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "a"); input = "aa"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#b0110000101100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "aa"); // Byte boundary tests input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#b00001100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new Sequence(new CodePoint { Match = "#b000" }, new CodePoint { Match = "#b01100001" })); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); // Don't care tests input = Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new OneOrMore(new CodePoint { Match = "#bXXXX0001" }) // #bXXXX0001 ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 })); input = Encoding.ASCII.GetString(new byte[] { 0x10 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#bXXXX0001" }); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = ""; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#bXXXX0001" }); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void Terminal_CodePoint_Hexadecimal() { Assert.IsTrue((Byte)'a' == 97); Assert.IsTrue((Byte)'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#x61" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "a"); // Byte boundary tests input = "\na"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xA61" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 0A = \n and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\na"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#x061" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 00 = \0 and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\0a"); // Don't care tests bytes = new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 }; iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new OneOrMore(new CodePoint { Match = "#xX1" }) // #bXXXX0001 ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 })); iterator = new ByteInputIterator(new byte[] { 0x10 }); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xX1" } ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = string.Empty; iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input)); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xX1" } ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }