public void PEGrammarParser_Comment() { AExpression rule = OneOrMore(GetRule("mComment")); AExpression root = WrapInCapturedGroup("Test", RequireEndOfInput(rule)); var bytes = Encoding.UTF8.GetBytes(@"//this is a single line comment."); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); bytes = Encoding.UTF8.GetBytes(@"/*this is a multiline comment.*/"); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); bytes = Encoding.UTF8.GetBytes(@"/* this is a multiline comment. */"); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void PEGrammar_LimitingRepetition() { var grammar = @" (?<ThreeDigitCode>): [0-9]{3,3}; (?<PhoneNumber>): ThreeDigitCode '-' ThreeDigitCode '-' (?<FourDigitCode>[0-9]{4}); "; var ROOT = PEGrammar.Load(grammar); var input = "123-456-7890"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "PhoneNumber"); Assert.IsTrue(node.Token.ValueAsString(iterator) == input); Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123"); Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456"); Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode"); Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890"); }
public void PEGrammar_PhoneNumber() { var input = "123-456-7890"; var PhoneNumber = PEGrammar.Load( @" (?<ThreeDigitCode>): [0-9] [0-9] [0-9]; (?<FourDigitCode>): [0-9] [0-9] [0-9] [0-9]; (?<PhoneNumber>): ThreeDigitCode '-' ThreeDigitCode '-' FourDigitCode; " .Trim()); var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); PhoneNumber.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "PhoneNumber"); Assert.IsTrue(node.Token.ValueAsString(iterator) == input); Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123"); Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456"); Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode"); Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890"); }
public void Iterator_Index() { var bytes = Encoding.UTF8.GetBytes("01234567890123456789"); var iterator = new ByteInputIterator(bytes); Assert.IsTrue(iterator.Index == 0); Assert.IsTrue(iterator.Length == bytes.Length); for (int i = 0; i < bytes.Length; i++) { Assert.IsTrue(iterator.Index == i); Assert.IsTrue(iterator.Current() == bytes[i]); if (i < bytes.Length - 1) { Assert.IsTrue(iterator.Next() == bytes[i + 1]); } } for (int i = bytes.Length - 1; i >= 0; i--) { Assert.IsTrue(iterator.Index == i); Assert.IsTrue(iterator.Current() == bytes[i]); if (i > 0) { Assert.IsTrue(iterator.Previous() == bytes[i - 1]); } } }
public void Terminal_DynamicBackReference() { #region Composite AExpression TAG = new CapturingGroup("TAG", new OneOrMore( new CharacterClass { ClassExpression = "[a-zA-Z0-9]" } ) ); AExpression StartTag = new CapturingGroup("START_TAG", new Sequence( new Literal { MatchText = "<" }, TAG) .Sequence( new Literal { MatchText = ">" } ) ); AExpression EndTag = new CapturingGroup("END_TAG", new Sequence( new Literal { MatchText = "</" }, new DynamicBackReference { BackReferenceName = "TAG", IsCaseSensitive = true } ) .Sequence( new Literal { MatchText = ">" } ) ); AExpression Body = new CapturingGroup("Body", new Sequence(new NotPredicate(EndTag), new AnyCharacter()).Star()); AExpression Expression = new CapturingGroup("Expression", new Sequence(StartTag, Body).Sequence(EndTag).Plus()); #endregion String input = "<h1>hello</h1><h2>hello</h2>"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; #warning write tree }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice3() { var prefix = new PrioritizedChoice( new CapturingGroup("AndPredicate", new Literal { MatchText = "&" }), new CapturingGroup("NotPredicate", new Literal { MatchText = "!" }) ); PrioritizedChoice suffix = new PrioritizedChoice( new CapturingGroup("ZeroOrMore", new Literal { MatchText = "*" }), new CapturingGroup("OneOrMore", new Literal { MatchText = "+" }) ) .Or(new CapturingGroup("Optional", new Literal { MatchText = "?" })); var terminal = new CapturingGroup("AnyCharacter", new Literal { MatchText = "." }); var expression = new CapturingGroup("Expression", new PrioritizedChoice( // match prefixes first prefix.Plus() .Sequence(terminal) , // match suffixes next terminal .Sequence( suffix.Plus() ) ) .Or(terminal) .Plus() ); var input = "."; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Token.Name == "Expression"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "."); Assert.IsTrue(node.Children[0].Token.Name == "AnyCharacter"); }
public void PEGrammarParser_NewLine() { AExpression rule = OneOrMore(GetRule("mNewLine")); AExpression root = WrapInCapturedGroup("Test", RequireEndOfInput(rule)); // notice only matches newlines of linux/win/mac/ var bytes = Encoding.UTF8.GetBytes("\n\n\r\n\r\r"); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void PEGrammarParser_Space() { AExpression rule = OneOrMore(GetRule("mSpace")); AExpression root = WrapInCapturedGroup("Test", RequireEndOfInput(rule)); var bytes = Encoding.UTF8.GetBytes(" \t \t\t \t"); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void Iterator_OutofRange() { var bytes = Encoding.UTF8.GetBytes(""); var iterator = new ByteInputIterator(bytes); Assert.IsTrue(iterator.Index == 0); Assert.IsTrue(iterator.Length == 0); Assert.IsTrue(iterator.Current() == -1); Assert.IsTrue(iterator.Index == 0); Assert.IsTrue(iterator.Next() == -1); Assert.IsTrue(iterator.Index == 0); Assert.IsTrue(iterator.Previous() == -1); Assert.IsTrue(iterator.Index == 0); }
public void PracticalExample_PhoneNumber() { #region terminals AExpression Digits = new CharacterClass { ClassExpression = "[0-9]" }; AExpression Hyphen = new Literal { MatchText = "-" }; #endregion #region nonterminals AExpression ThreeDigitCode = new CapturingGroup("ThreeDigitCode", new Sequence(Digits, Digits).Sequence(Digits)); AExpression FourDigitCode = new CapturingGroup("FourDigitCode", new Sequence(Digits, Digits).Sequence(Digits).Sequence(Digits)); AExpression PhoneNumber = new CapturingGroup("PhoneNumber", new Sequence(ThreeDigitCode, Hyphen) .Sequence(ThreeDigitCode) .Sequence(Hyphen) .Sequence(FourDigitCode) ); #endregion String input = "123-456-7890"; // Test Manual Composite var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); PhoneNumber.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "PhoneNumber"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "123-456-7890"); Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123"); Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456"); Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode"); Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890"); }
public void Iterator_Initialization() { var input = "01234567890123456789"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); // tests that iterator begins at zero based index Assert.IsTrue(iterator.Index == 0); Assert.IsTrue(iterator.Length == 20); Assert.IsTrue(iterator.Current() == '0'); Assert.IsTrue(iterator.Next() == '1'); Assert.IsTrue(iterator.Previous() == '0'); Assert.IsTrue(bytes.SequenceEqual(iterator.Text(0, 19)), "Text unable to return complete input."); }
public void NonTerminal_Predicate_And() { // predicates should not adjust the // iterator once the expression is evaluated. AExpression Digit = new CharacterClass {ClassExpression = "[0-9]"}; // regex expression: \d+ var input = Encoding.UTF8.GetBytes("01234567890123456789"); var iterator = new ByteInputIterator(input); AExpression andPredicate = new OneOrMore(Digit).And(); var visitor = new NpegParserVisitor(iterator); andPredicate.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(iterator.Index == 0); }
public static AExpression Load(String rules) { var rootExpression = RootPegExpression(); var iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(rules)); var visitor = new NpegParserVisitor(iterator, new PeGrammarAstNodeFactory(iterator)); rootExpression.Accept(visitor); if (visitor.IsMatch) { var interpret = (InterpreterAstNode)visitor.AST; return interpret.Expression; } throw new InvalidRuleException(); }
public void NonTerminal_Predicate_Or() { // predicates should not adjust the // iterator once the expression is evaluated. AExpression Digit = new CharacterClass {ClassExpression = "[0-9]"}; // equivalent to: regex '^' '$' // regex expression: ^\d+$ var bytes = Encoding.UTF8.GetBytes("0123456abcdefg"); var iterator = new ByteInputIterator(bytes); AExpression notPredicate = new OneOrMore(Digit).And().Sequence(new NotPredicate(new AnyCharacter())); var visitor = new NpegParserVisitor(iterator); notPredicate.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // should fail Assert.IsTrue(iterator.Index == 0); }
public void PEGrammar_Interpreter_CodePoint() { AExpression ROOT = PEGrammar.Load( @" (?<Value>): #x20; " ); String input = " "; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void PEGrammar_Literal() { AExpression caseSensitive = PEGrammar.Load(@"(?<Expression>): 'Hello World';"); var input = "hello world"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); caseSensitive.Accept(visitor); Assert.IsFalse(visitor.IsMatch); AExpression notCaseSensitive = PEGrammar.Load(@"(?<Expression>): 'Hello World'\i;"); input = "hello world"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); notCaseSensitive.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Expression"); Assert.IsTrue(node.Token.ValueAsString(iterator) == input); // not sure if it would be better to use verbatim identifier @"" for escaping // escape back slash inside double quotes input = @"\"; AExpression escape = PEGrammar.Load(@"(?<Literal>): ""\\"";"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); escape.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(@"\" == visitor.AST.Token.ValueAsString(iterator)); input = @"\"; escape = PEGrammar.Load(@"(?<Literal>): '\\';"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); escape.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(@"\" == visitor.AST.Token.ValueAsString(iterator)); }
public void Terminal_Any() { var input = "ijk"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); AExpression any = new Sequence(new AnyCharacter(), new AnyCharacter()); var visitor = new NpegParserVisitor(iterator); any.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(iterator.Index == 2, "Expected two characters to be consumed and Iterator updated by 2. 0, 1 .. points to 2"); input = "ij"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); any.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(iterator.Index == 2, "Expected two characters to be consumed and Iterator updated by 2. 0, 1 .. points to 2"); input = ""; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); any = new AnyCharacter(); visitor = new NpegParserVisitor(iterator); any.Accept(visitor); Assert.IsFalse(visitor.IsMatch); Assert.IsTrue(iterator.Index == 0, "Expected no characters to be consumed and index stay at zero."); var number = new Sequence( new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" }), new NotPredicate( new AnyCharacter() ) ); input = "012345."; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); number.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void NonTerminal_Predicate_And() { // predicates should not adjust the // iterator once the expression is evaluated. AExpression Digit = new CharacterClass { ClassExpression = "[0-9]" }; // regex expression: \d+ var input = Encoding.UTF8.GetBytes("01234567890123456789"); var iterator = new ByteInputIterator(input); AExpression andPredicate = new OneOrMore(Digit).And(); var visitor = new NpegParserVisitor(iterator); andPredicate.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(iterator.Index == 0); }
public void Terminal_CharacterClass() { AExpression Digit = new CharacterClass {ClassExpression = "[0-9]"}; var input = "0"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Digit.Accept(visitor); Assert.IsTrue(visitor.IsMatch); input = "0123456789"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); new OneOrMore(Digit).Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void Iterator_GetText_Limit() { var bytes = Encoding.UTF8.GetBytes("01234567890123456789"); var iterator = new ByteInputIterator(bytes); Assert.IsTrue(Encoding.ASCII.GetBytes("0").SequenceEqual(iterator.Text(0, 0)), "Text unable to return first character."); Assert.IsTrue(Encoding.ASCII.GetBytes("9").SequenceEqual(iterator.Text(19, 19)), "Text unable to return last character."); Assert.IsTrue(Encoding.ASCII.GetBytes("01").SequenceEqual(iterator.Text(0, 1)), "Text unable to return specified start and end characters inclusive."); try { iterator.Text(19, 0); Assert.Fail("Start must be <= End"); } catch (IteratorUsageException e) { } }
public void Terminal_CodePoint_Decimal() { var input = "&"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var codepoint = new CapturingGroup("CodePoint", new CodePoint { Match = "#38" } ); codepoint.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "CodePoint"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "&"); }
public void NonTerminal_Predicate_Or() { // predicates should not adjust the // iterator once the expression is evaluated. AExpression Digit = new CharacterClass { ClassExpression = "[0-9]" }; // equivalent to: regex '^' '$' // regex expression: ^\d+$ var bytes = Encoding.UTF8.GetBytes("0123456abcdefg"); var iterator = new ByteInputIterator(bytes); AExpression notPredicate = new OneOrMore(Digit).And().Sequence(new NotPredicate(new AnyCharacter())); var visitor = new NpegParserVisitor(iterator); notPredicate.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // should fail Assert.IsTrue(iterator.Index == 0); }
public void Terminal_Any() { var input = "ijk"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); AExpression any = new Sequence(new AnyCharacter(), new AnyCharacter()); var visitor = new NpegParserVisitor(iterator); any.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(iterator.Index == 2, "Expected two characters to be consumed and Iterator updated by 2. 0, 1 .. points to 2"); input = "ij"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); any.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(iterator.Index == 2, "Expected two characters to be consumed and Iterator updated by 2. 0, 1 .. points to 2"); input = ""; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); any = new AnyCharacter(); visitor = new NpegParserVisitor(iterator); any.Accept(visitor); Assert.IsFalse(visitor.IsMatch); Assert.IsTrue(iterator.Index == 0, "Expected no characters to be consumed and index stay at zero."); var number = new Sequence( new OneOrMore(new CharacterClass {ClassExpression = "[0-9]"}), new NotPredicate( new AnyCharacter() ) ); input = "012345."; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); number.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void Terminal_CharacterClass() { AExpression Digit = new CharacterClass { ClassExpression = "[0-9]" }; var input = "0"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Digit.Accept(visitor); Assert.IsTrue(visitor.IsMatch); input = "0123456789"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); new OneOrMore(Digit).Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void PEGrammar_LimitingRepetition_VariableExpression() { var grammar = @" (?<ESC_AMP_Y>): . . . (?<C1>.) (?<C2>.) ( ((?<X> .) (?<D> .{3})) ){(\k<C2> - \k<C1>)+1}; "; var ROOT = PEGrammar.Load(grammar); //. . . C1 C2 X D D D var bytes = new byte[] { 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 }; var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "ESC_AMP_Y"); Assert.IsTrue(node.Token.End == bytes.Length - 1); // zero index //. . . C1 C2 bytes = new byte[] { 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, //X D D D 0x00, 0x00, 0x00, 0x00, //X D D D 0x00 }; iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "ESC_AMP_Y"); Assert.IsTrue(node.Token.End == bytes.Length - 2); // zero index - expect additional character to not be consumed }
public void PEGrammar_MathematicalFormula_Recursion() { AExpression ROOT = PEGrammar.Load( @" (?<Value>): [0-9]+ / '(' Expr ')'; (?<Product>): Value ((?<Symbol>'*' / '/') Value)*; (?<Sum>): Product ((?<Symbol>'+' / '-') Product)*; (?<Expr>): Sum; " ); String input = "((((12/3)+5-2*(81/9))+1))"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); #warning does not specify expected tree }
public void PEGrammar_RecursiveParentheses() { var input = "((((((123))))))"; var bytes = Encoding.UTF8.GetBytes(input); AExpression ROOT = PEGrammar.Load( @" (?<DIGITS>): ([0-9])+; (?<ENCLOSEDDIGITS>): '(' ParethesisFunction ')'; ParethesisFunction: (DIGITS / ENCLOSEDDIGITS); (?<RECURSIONTEST>): ParethesisFunction; " .Trim()); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); }
public void Terminal_Literal() { var Mixed = new Literal { MatchText = "Hello World" }; var input = "hello world"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Mixed.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // Not case sensitve Mixed.IsCaseSensitive = false; input = "hello world"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Mixed.Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void PEGrammar_DynamicBackReference_Xml() { var grammar = @" (?<Tag>): [a-zA-Z0-9]+; (?<StartTag>): '<' Tag '>'; (?<EndTag>): '</' \k<Tag> '>' ; (?<Body>): (Xml / (!EndTag .))+; (?<Xml>): (StartTag Body EndTag )+; " ; var input = @" <test> test data start <test1> test1 data start <test2> text2 data start text2 data end </test2> test1 data end </test1> test data end </test> " .Trim(); var ROOT = PEGrammar.Load(grammar); var iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input)); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; throw new NotImplementedException("Refactoring - plan on changing backreferencing logic inside NPEGParser - just placeholder of failing test for now; conserve memory"); }
public void PEGrammar_BooleanAlgebra() { String grammar = @" S: [\s]+; (?<Gate>): ('*' / 'AND') / ('~*' / 'NAND') / ('+' / 'OR') / ('~+' / 'NOR') / ('^' / 'XOR') / ('~^' / 'XNOR'); ValidVariable: '""' (?<Variable>[a-zA-Z0-9]+) '""' / '\'' (?<Variable>[a-zA-Z0-9]+) '\'' / (?<Variable>[a-zA-Z]); VarProjection1: ValidVariable / (?<Invertor>'!' ValidVariable); VarProjection2: VarProjection1 / '(' Expression ')' / (?<Invertor>'!' '(' Expression ')'); Expression: S? VarProjection2 S? (Gate S? VarProjection2 S?)*; (?<BooleanEquation>): Expression !.; " .Trim(); AExpression ROOT = PEGrammar.Load(grammar); // single variable var input = ("A*!B+!A*B"); var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); // quoted variable input = ("'aA'*!'bB'+!'aA'*'bB'"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); // expression + gate + variable .star() input = ("A*!B*C+!A*B*C"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); // parethesis input = ("((A)*(!B)+(!A)*(B))"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("((A)*!(B)+!(A)*(B))"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("((A)*(!(B))+(!(A))*(B))"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)+(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("((((!X*Y*Z)+(!X*Y*!Z)+(X*Z))))"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); }
public void PEGrammar_Interpreter_Warn() { AExpression ROOT = PEGrammar.Load( @" (?<Value>): Warn<'warning'>; " ); String input = " "; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); Assert.IsTrue(visitor.Warnings.Count == 1); }
public void CompositeVisitor_Recursiveness() { var whitespace = new CharacterClass {ClassExpression = "[ \t\r\n\v]"}; var terminal = new PrioritizedChoice( new CapturingGroup("AnyCharacter", new Literal {MatchText = "."}) , new CapturingGroup("CapturingGroup", new Sequence( new Literal {MatchText = "(?<"}, new CapturingGroup("ReplacementNode", new OneOrMore( new CharacterClass {ClassExpression = "[a-z0-9A-Z]"} ) ) ) .Sequence(new Literal {MatchText = ">"}) .Sequence(new RecursionCall("Expression")) .Sequence(new Literal {MatchText = ")"}) ) ); var sequence = new CapturingGroup( "Sequence", new Sequence( terminal, new ZeroOrMore(whitespace) ).Plus() ) {DoReplaceBySingleChildNode = true}; var prioritizedchoice = new CapturingGroup("PrioritizedChoice", new Sequence( sequence, new Literal {MatchText = "/"} ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Sequence( new ZeroOrMore( new Sequence( new ZeroOrMore(whitespace), new Literal {MatchText = "/"} ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Plus() ) ) ); var expression = new CapturingGroup("Root", new RecursionCreate("Expression", new PrioritizedChoice(prioritizedchoice, sequence))); var input = @"(?<NPEGNode>./.. )"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Root"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "CapturingGroup"); Assert.IsTrue(node.Children[0].Children.Count == 2); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "ReplacementNode"); Assert.IsTrue(node.Children[0].Children[1].Token.Name == "PrioritizedChoice"); Assert.IsTrue(node.Children[0].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Token.Name == "Sequence"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[1].Token.Name == "AnyCharacter"); }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice1() { PrioritizedChoice newline = new PrioritizedChoice( new Literal {MatchText = "\r\n"}, // windows new Literal {MatchText = "\r\r"} // old macs ) .Or(new Literal {MatchText = "\n"}); // linux // Single Line Comment var singleLineComment = new Sequence( new Literal {MatchText = "//"}, new Sequence( new NotPredicate(newline), new AnyCharacter() ) .Star() ); // Multiline Comment var multiLineComment = new Sequence( new Literal {MatchText = "/*"}, new Sequence( new NotPredicate(new Literal {MatchText = "*/"}), new AnyCharacter() ) .Star() .Sequence(new Literal {MatchText = "*/"}) ); var comment = new PrioritizedChoice(singleLineComment, multiLineComment); var whitespace = new PrioritizedChoice( new CharacterClass {ClassExpression = "[ \t\r\n\v]"}, comment ); var label = new CapturingGroup("Label", new Sequence( new CharacterClass {ClassExpression = "[a-zA-Z_]"}, // must start with alpha character new ZeroOrMore(new CharacterClass {ClassExpression = "[a-zA-Z0-9_]"}) ) ); var backreference = new CapturingGroup("DynamicBackReferencing", new Sequence( new Literal {MatchText = @"\k<"}, new Sequence(new ZeroOrMore(whitespace), label).Sequence( new ZeroOrMore(whitespace)) ) .Sequence( new Optional( new Sequence( new Sequence( new Literal {MatchText = "["}, new CapturingGroup("CaseSensitive", new Literal {MatchText = @"\i"} ) ), new Literal {MatchText = "]"} ) ) ) .Sequence( new Sequence(new ZeroOrMore(whitespace), new Literal {MatchText = ">"}) ) ); var root = new CapturingGroup("Test", new Sequence( backreference, new NotPredicate(new AnyCharacter()) ) ); var input = @"\k< CapturedLabelVariableName >"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Test"); Assert.IsTrue(node.Children[0].Token.Name == "DynamicBackReferencing"); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "Label"); Assert.IsTrue(node.Children[0].Children[0].Token.ValueAsString(iterator) == "CapturedLabelVariableName"); }
public void CompositeVisitor_NestedRecursive() { #region Composite var DIGITS = new CapturingGroup("DIGITS", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" })); var LTENCLOSED = new RecursionCreate("RECURSIONLTENCLOSED", new PrioritizedChoice(DIGITS, new CapturingGroup("LTENCLOSED", new Sequence( new Literal { MatchText = "<" }, new RecursionCall( "RECURSIONLTENCLOSED") ).Sequence(new Literal { MatchText = ">" }) ) ) ); var PENCLOSED = new RecursionCreate("RECURSIONPENCLOSED", new PrioritizedChoice(LTENCLOSED, new CapturingGroup("PENCLOSED", new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONPENCLOSED") ).Sequence(new Literal { MatchText = ")" }) ) ) ); AExpression ROOT = new CapturingGroup("NESTEDRECURSIONTEST", PENCLOSED); #endregion var input = "(((<<<123>>>)))"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); Assert.IsTrue(node.Token.Name == "NESTEDRECURSIONTEST"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); }
public void Terminal_CodePoint_Hexadecimal() { Assert.IsTrue((Byte) 'a' == 97); Assert.IsTrue((Byte) 'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint {Match = "#x61"} ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "a"); // Byte boundary tests input = "\na"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint {Match = "#xA61"} ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 0A = \n and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\na"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint {Match = "#x061"} ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 00 = \0 and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\0a"); // Don't care tests bytes = new byte[] {0x11, 0x01, 0x71, 0x03, 0x00}; iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new OneOrMore(new CodePoint {Match = "#xX1"}) // #bXXXX0001 ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] {0x11, 0x01, 0x71})); iterator = new ByteInputIterator(new byte[] { 0x10 }); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint {Match = "#xX1"} ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = string.Empty; iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input)); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint {Match = "#xX1"} ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void Terminal_CodePoint_Hexadecimal() { Assert.IsTrue((Byte)'a' == 97); Assert.IsTrue((Byte)'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#x61" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "a"); // Byte boundary tests input = "\na"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xA61" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 0A = \n and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\na"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#x061" } ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be 00 = \0 and letter a."); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "\0a"); // Don't care tests bytes = new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 }; iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new OneOrMore(new CodePoint { Match = "#xX1" }) // #bXXXX0001 ); Hexadecimal.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "Hexadecimal"); Assert.IsTrue(node.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 })); iterator = new ByteInputIterator(new byte[] { 0x10 }); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xX1" } ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = string.Empty; iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input)); visitor = new NpegParserVisitor(iterator); Hexadecimal = new CapturingGroup("Hexadecimal", new CodePoint { Match = "#xX1" } ); Hexadecimal.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void Terminal_CodePoint_Binary() { Assert.IsTrue((Byte)'a' == 97); Assert.IsTrue((Byte)'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var binary = new CapturingGroup("Binary", new CodePoint { Match = "#b1100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "a"); input = "aa"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#b0110000101100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "aa"); // Byte boundary tests input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#b00001100001" } ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new Sequence(new CodePoint { Match = "#b000" }, new CodePoint { Match = "#b01100001" })); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); // Don't care tests input = Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new OneOrMore(new CodePoint { Match = "#bXXXX0001" }) // #bXXXX0001 ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 })); input = Encoding.ASCII.GetString(new byte[] { 0x10 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#bXXXX0001" }); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = ""; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint { Match = "#bXXXX0001" }); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }
public void Terminal_LimitingRepetition() { // min // min max // max // math expression using back referencing {(\k<C2> - \k<C1>)+1} - variable length protocols AExpression Digits = new CharacterClass { ClassExpression = "[0-9]" }; #region nonterminals var MinTrue0 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) { Min = 0 } ); var MinFalse = new CapturingGroup("MinFalse", new LimitingRepetition(Digits) { Min = 44 } ); var MinTrue5 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) { Min = 5 } ); var MaxTrue = new CapturingGroup("MaxTrue", new LimitingRepetition(Digits) { Max = 5 } ); var MinMax = new CapturingGroup("MinMax", new LimitingRepetition(Digits) { Min = 5, Max = 6 } ); var ExceptionNoMinMax = new CapturingGroup("ExceptionNoMinMax", new LimitingRepetition(Digits) { } ); var ExceptionMaxLessThanMin = new CapturingGroup("ExceptionMaxLessThanMin", new LimitingRepetition(Digits) { Min = 5, Max = 0 } ); #endregion String input = "1234567890"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); MinTrue0.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinFalse.Accept(visitor); Assert.IsFalse(visitor.IsMatch); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinTrue5.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MaxTrue.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Int32 exceptionCount = 0; try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionNoMinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionMaxLessThanMin.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } Assert.IsTrue(exceptionCount == 2); }
public void PracticalExample_BooleanAlgebra() { #region Composite //AND: */AND AExpression AND = new PrioritizedChoice(new Literal { MatchText = "*" }, new Literal { MatchText = "AND" }); //NAND: ~*/NAND AExpression NAND = new PrioritizedChoice(new Literal { MatchText = "~*" }, new Literal { MatchText = "NAND" }); //OR: +/OR AExpression OR = new PrioritizedChoice(new Literal { MatchText = "+" }, new Literal { MatchText = "OR" }); //NOR: ~+/NOR AExpression NOR = new PrioritizedChoice(new Literal { MatchText = "~+" }, new Literal { MatchText = "NOR" }); //XOR: ^/XOR AExpression XOR = new PrioritizedChoice(new Literal { MatchText = "^" }, new Literal { MatchText = "XOR" }); //XNOR: ~^/XNOR AExpression XNOR = new PrioritizedChoice(new Literal { MatchText = "~^" }, new Literal { MatchText = "XNOR" }); AExpression GATE = new CapturingGroup("GATE", new PrioritizedChoice(AND, NAND).Or(OR).Or(NOR).Or(XOR).Or(XNOR)); // Variable: "[a-zA-Z0-9]+" / '[a-zA-Z0-9]+' / [a-zA-Z] AExpression VARIABLE = new PrioritizedChoice( new Sequence( new Literal { MatchText = "\"" }, new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9]" })) ).Sequence(new Literal { MatchText = "\"" }), new Sequence( new Literal { MatchText = "'" }, new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9]" })) ).Sequence(new Literal { MatchText = "'" }) ).Or( new CapturingGroup("VARIABLE", new CharacterClass { ClassExpression = "[a-zA-Z]" }) ); // Variable: Variable / !Variable VARIABLE = new PrioritizedChoice( VARIABLE , new CapturingGroup("INVERTOR", new Sequence( new Literal { MatchText = "!" }, VARIABLE ) ) ); // Variable: Variable / Expression / !Expression VARIABLE = new PrioritizedChoice( VARIABLE , new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONEXPRESSION") ).Sequence(new Literal { MatchText = ")" }) ).Or( new CapturingGroup("INVERTOR", new Sequence( new Literal { MatchText = "!" } , new Sequence( new Literal { MatchText = "(" }, new RecursionCall("RECURSIONEXPRESSION") ).Sequence(new Literal { MatchText = ")" }) ) ) ); AExpression Root = new CapturingGroup("BOOLEANEQUATION", new Sequence( new RecursionCreate("RECURSIONEXPRESSION", //Expression: Variable ((AND|NAND|OR|NOR|XOR|XNOR) Variable)* new Sequence(VARIABLE, new Sequence(GATE, VARIABLE).Star()) ) , // ensure reaches end of file new NotPredicate(new AnyCharacter()) ) ); #endregion // single variable var input = "A*!B+!A*B"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION"); Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "A"); Assert.IsTrue(node.Children[1].Token.Name == "GATE"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*"); Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "B"); Assert.IsTrue(node.Children[3].Token.Name == "GATE"); Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "A"); Assert.IsTrue(node.Children[5].Token.Name == "GATE"); Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "B"); // quoted variable input = "'aA'*!'bB'+!'aA'*'bB'"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION"); Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "aA"); Assert.IsTrue(node.Children[1].Token.Name == "GATE"); Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*"); Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "bB"); Assert.IsTrue(node.Children[3].Token.Name == "GATE"); Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR"); Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "aA"); Assert.IsTrue(node.Children[5].Token.Name == "GATE"); Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE"); Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "bB"); // expression + gate + variable .star() input = "A*!B*C+!A*B*C"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); // parethesis input = "((A)*(!B)+(!A)*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = "((A)*!(B)+!(A)*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = "((A)*(!(B))+(!(A))*(B))"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("(!X*Y*!Z)+(!X*Y*Z)+(X*Z)"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); input = ("((((!X*Y*Z)+(!X*Y*!Z)+(X*Z))))"); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; #warning Assert.IsTrue(node.Token.Value == input); }
public void Terminal_DynamicBackReference() { #region Composite AExpression TAG = new CapturingGroup("TAG", new OneOrMore( new CharacterClass {ClassExpression = "[a-zA-Z0-9]"} ) ); AExpression StartTag = new CapturingGroup("START_TAG", new Sequence( new Literal {MatchText = "<"}, TAG) .Sequence( new Literal {MatchText = ">"} ) ); AExpression EndTag = new CapturingGroup("END_TAG", new Sequence( new Literal {MatchText = "</"}, new DynamicBackReference { BackReferenceName = "TAG", IsCaseSensitive = true } ) .Sequence( new Literal {MatchText = ">"} ) ); AExpression Body = new CapturingGroup("Body", new Sequence(new NotPredicate(EndTag), new AnyCharacter()).Star()); AExpression Expression = new CapturingGroup("Expression", new Sequence(StartTag, Body).Sequence(EndTag).Plus()); #endregion String input = "<h1>hello</h1><h2>hello</h2>"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; #warning write tree }
public void CompositeVisitor_Recursiveness() { var whitespace = new CharacterClass { ClassExpression = "[ \t\r\n\v]" }; var terminal = new PrioritizedChoice( new CapturingGroup("AnyCharacter", new Literal { MatchText = "." }) , new CapturingGroup("CapturingGroup", new Sequence( new Literal { MatchText = "(?<" }, new CapturingGroup("ReplacementNode", new OneOrMore( new CharacterClass { ClassExpression = "[a-z0-9A-Z]" } ) ) ) .Sequence(new Literal { MatchText = ">" }) .Sequence(new RecursionCall("Expression")) .Sequence(new Literal { MatchText = ")" }) ) ); var sequence = new CapturingGroup( "Sequence", new Sequence( terminal, new ZeroOrMore(whitespace) ).Plus() ) { DoReplaceBySingleChildNode = true }; var prioritizedchoice = new CapturingGroup("PrioritizedChoice", new Sequence( sequence, new Literal { MatchText = "/" } ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Sequence( new ZeroOrMore( new Sequence( new ZeroOrMore(whitespace), new Literal { MatchText = "/" } ) .Sequence(new ZeroOrMore(whitespace)) .Sequence(sequence) .Plus() ) ) ); var expression = new CapturingGroup("Root", new RecursionCreate("Expression", new PrioritizedChoice(prioritizedchoice, sequence))); var input = @"(?<NPEGNode>./.. )"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Root"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "CapturingGroup"); Assert.IsTrue(node.Children[0].Children.Count == 2); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "ReplacementNode"); Assert.IsTrue(node.Children[0].Children[1].Token.Name == "PrioritizedChoice"); Assert.IsTrue(node.Children[0].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Token.Name == "Sequence"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[0].Token.Name == "AnyCharacter"); Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[1].Token.Name == "AnyCharacter"); }
public void Terminal_DynamicBackReference_Recursive() { String input = @" <test> test data start <test1> test1 data start <test2> text2 data start text2 data end </test2> test1 data end </test1> test data end </test> "; var TAG = new CapturingGroup("TAG", new OneOrMore( new CharacterClass {ClassExpression = "[a-zA-Z0-9]"} ) ); var StartTag = new CapturingGroup("START_TAG", new Sequence( new Literal {MatchText = "<"}, TAG) .Sequence( new Literal {MatchText = ">"} ) ); var EndTag = new CapturingGroup("END_TAG", new Sequence( new Literal {MatchText = "</"}, new DynamicBackReference { BackReferenceName = "TAG", IsCaseSensitive = true } ) .Sequence( new Literal {MatchText = ">"} ) ); var Body = new CapturingGroup("Body", new PrioritizedChoice( new RecursionCall("MATCHXML"), new Sequence(new NotPredicate(EndTag), new AnyCharacter()) ).Star() ); var Expression = new CapturingGroup("Expression", new RecursionCreate("MATCHXML", new Sequence(StartTag, Body) .Sequence(EndTag) .Plus() ) ); var bytes = Encoding.UTF8.GetBytes(input.Trim()); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice3() { var prefix = new PrioritizedChoice( new CapturingGroup("AndPredicate", new Literal {MatchText = "&"}), new CapturingGroup("NotPredicate", new Literal {MatchText = "!"}) ); PrioritizedChoice suffix = new PrioritizedChoice( new CapturingGroup("ZeroOrMore", new Literal {MatchText = "*"}), new CapturingGroup("OneOrMore", new Literal {MatchText = "+"}) ) .Or(new CapturingGroup("Optional", new Literal {MatchText = "?"})); var terminal = new CapturingGroup("AnyCharacter", new Literal {MatchText = "."}); var expression = new CapturingGroup("Expression", new PrioritizedChoice( // match prefixes first prefix.Plus() .Sequence(terminal) , // match suffixes next terminal .Sequence( suffix.Plus() ) ) .Or(terminal) .Plus() ); var input = "."; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Token.Name == "Expression"); Assert.IsTrue(node.Token.ValueAsString(iterator) == "."); Assert.IsTrue(node.Children[0].Token.Name == "AnyCharacter"); }
public void Terminal_LimitingRepetition() { // min // min max // max // math expression using back referencing {(\k<C2> - \k<C1>)+1} - variable length protocols AExpression Digits = new CharacterClass {ClassExpression = "[0-9]"}; #region nonterminals var MinTrue0 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) {Min = 0} ); var MinFalse = new CapturingGroup("MinFalse", new LimitingRepetition(Digits) {Min = 44} ); var MinTrue5 = new CapturingGroup("MinTrue", new LimitingRepetition(Digits) {Min = 5} ); var MaxTrue = new CapturingGroup("MaxTrue", new LimitingRepetition(Digits) {Max = 5} ); var MinMax = new CapturingGroup("MinMax", new LimitingRepetition(Digits) {Min = 5, Max = 6} ); var ExceptionNoMinMax = new CapturingGroup("ExceptionNoMinMax", new LimitingRepetition(Digits) {} ); var ExceptionMaxLessThanMin = new CapturingGroup("ExceptionMaxLessThanMin", new LimitingRepetition(Digits) {Min = 5, Max = 0} ); #endregion String input = "1234567890"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); MinTrue0.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinFalse.Accept(visitor); Assert.IsFalse(visitor.IsMatch); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinTrue5.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MaxTrue.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; iterator.Index = 0; visitor = new NpegParserVisitor(iterator); MinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Int32 exceptionCount = 0; try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionNoMinMax.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } try { iterator.Index = 0; visitor = new NpegParserVisitor(iterator); ExceptionMaxLessThanMin.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; } catch (ArgumentException) { exceptionCount++; } Assert.IsTrue(exceptionCount == 2); }
public void CompositeVisitor_NestedRecursive() { #region Composite var DIGITS = new CapturingGroup("DIGITS", new OneOrMore(new CharacterClass {ClassExpression = "[0-9]"})); var LTENCLOSED = new RecursionCreate("RECURSIONLTENCLOSED", new PrioritizedChoice(DIGITS, new CapturingGroup("LTENCLOSED", new Sequence( new Literal {MatchText = "<"}, new RecursionCall( "RECURSIONLTENCLOSED") ).Sequence(new Literal {MatchText = ">"}) ) ) ); var PENCLOSED = new RecursionCreate("RECURSIONPENCLOSED", new PrioritizedChoice(LTENCLOSED, new CapturingGroup("PENCLOSED", new Sequence( new Literal {MatchText = "("}, new RecursionCall("RECURSIONPENCLOSED") ).Sequence(new Literal {MatchText = ")"}) ) ) ); AExpression ROOT = new CapturingGroup("NESTEDRECURSIONTEST", PENCLOSED); #endregion var input = "(((<<<123>>>)))"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); Assert.IsTrue(node.Token.Name == "NESTEDRECURSIONTEST"); Assert.IsTrue(node.Children.Count == 1); Assert.IsTrue(node.Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Token.Name == "PENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED"); Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1); }
public void Terminal_Literal() { var Mixed = new Literal {MatchText = "Hello World"}; var input = "hello world"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Mixed.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // Not case sensitve Mixed.IsCaseSensitive = false; input = "hello world"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); Mixed.Accept(visitor); Assert.IsTrue(visitor.IsMatch); }
public void PEGrammar_DynamicBackReference_Xml() { var grammar = @" (?<Tag>): [a-zA-Z0-9]+; (?<StartTag>): '<' Tag '>'; (?<EndTag>): '</' \k<Tag> '>' ; (?<Body>): (Xml / (!EndTag .))+; (?<Xml>): (StartTag Body EndTag )+; "; var input = @" <test> test data start <test1> test1 data start <test2> text2 data start text2 data end </test2> test1 data end </test1> test data end </test> ".Trim(); var ROOT = PEGrammar.Load(grammar); var iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input)); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; throw new NotImplementedException("Refactoring - plan on changing backreferencing logic inside NPEGParser - just placeholder of failing test for now; conserve memory"); }
public void Terminal_DynamicBackReference_Recursive() { String input = @" <test> test data start <test1> test1 data start <test2> text2 data start text2 data end </test2> test1 data end </test1> test data end </test> "; var TAG = new CapturingGroup("TAG", new OneOrMore( new CharacterClass { ClassExpression = "[a-zA-Z0-9]" } ) ); var StartTag = new CapturingGroup("START_TAG", new Sequence( new Literal { MatchText = "<" }, TAG) .Sequence( new Literal { MatchText = ">" } ) ); var EndTag = new CapturingGroup("END_TAG", new Sequence( new Literal { MatchText = "</" }, new DynamicBackReference { BackReferenceName = "TAG", IsCaseSensitive = true } ) .Sequence( new Literal { MatchText = ">" } ) ); var Body = new CapturingGroup("Body", new PrioritizedChoice( new RecursionCall("MATCHXML"), new Sequence(new NotPredicate(EndTag), new AnyCharacter()) ).Star() ); var Expression = new CapturingGroup("Expression", new RecursionCreate("MATCHXML", new Sequence(StartTag, Body) .Sequence(EndTag) .Plus() ) ); var bytes = Encoding.UTF8.GetBytes(input.Trim()); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); Expression.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; }
public void PEGrammar_LimitingRepetition_VariableExpression() { var grammar = @" (?<ESC_AMP_Y>): . . . (?<C1>.) (?<C2>.) ( ((?<X> .) (?<D> .{3})) ){(\k<C2> - \k<C1>)+1}; "; var ROOT = PEGrammar.Load(grammar); //. . . C1 C2 X D D D var bytes = new byte[]{0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00}; var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "ESC_AMP_Y"); Assert.IsTrue(node.Token.End == bytes.Length - 1); // zero index //. . . C1 C2 bytes = new byte[] { 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, //X D D D 0x00, 0x00, 0x00, 0x00, //X D D D 0x00 }; iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); ROOT.Accept(visitor); Assert.IsTrue(visitor.IsMatch); node = visitor.AST; Assert.IsTrue(node.Token.Name == "ESC_AMP_Y"); Assert.IsTrue(node.Token.End == bytes.Length - 2); // zero index - expect additional character to not be consumed }
public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice1() { PrioritizedChoice newline = new PrioritizedChoice( new Literal { MatchText = "\r\n" }, // windows new Literal { MatchText = "\r\r" } // old macs ) .Or(new Literal { MatchText = "\n" }); // linux // Single Line Comment var singleLineComment = new Sequence( new Literal { MatchText = "//" }, new Sequence( new NotPredicate(newline), new AnyCharacter() ) .Star() ); // Multiline Comment var multiLineComment = new Sequence( new Literal { MatchText = "/*" }, new Sequence( new NotPredicate(new Literal { MatchText = "*/" }), new AnyCharacter() ) .Star() .Sequence(new Literal { MatchText = "*/" }) ); var comment = new PrioritizedChoice(singleLineComment, multiLineComment); var whitespace = new PrioritizedChoice( new CharacterClass { ClassExpression = "[ \t\r\n\v]" }, comment ); var label = new CapturingGroup("Label", new Sequence( new CharacterClass { ClassExpression = "[a-zA-Z_]" }, // must start with alpha character new ZeroOrMore(new CharacterClass { ClassExpression = "[a-zA-Z0-9_]" }) ) ); var backreference = new CapturingGroup("DynamicBackReferencing", new Sequence( new Literal { MatchText = @"\k<" }, new Sequence(new ZeroOrMore(whitespace), label).Sequence( new ZeroOrMore(whitespace)) ) .Sequence( new Optional( new Sequence( new Sequence( new Literal { MatchText = "[" }, new CapturingGroup("CaseSensitive", new Literal { MatchText = @"\i" } ) ), new Literal { MatchText = "]" } ) ) ) .Sequence( new Sequence(new ZeroOrMore(whitespace), new Literal { MatchText = ">" }) ) ); var root = new CapturingGroup("Test", new Sequence( backreference, new NotPredicate(new AnyCharacter()) ) ); var input = @"\k< CapturedLabelVariableName >"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); root.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.Name == "Test"); Assert.IsTrue(node.Children[0].Token.Name == "DynamicBackReferencing"); Assert.IsTrue(node.Children[0].Children[0].Token.Name == "Label"); Assert.IsTrue(node.Children[0].Children[0].Token.ValueAsString(iterator) == "CapturedLabelVariableName"); }
public void PracticalExample_MathematicalFormula() { #region Composite var VALUE = new PrioritizedChoice( new CapturingGroup("VALUE", new OneOrMore(new CharacterClass { ClassExpression = "[0-9]" }) ) , new Sequence( new Literal { MatchText = "(" }, new RecursionCall("ParethesisFunction") ) .Sequence(new Literal { MatchText = ")" }) ); var PRODUCT = new Sequence( VALUE, new Sequence( new CapturingGroup("SYMBOL", new PrioritizedChoice( new Literal { MatchText = "*" }, new Literal { MatchText = "/" } ) ), VALUE ).Star() ); var SUM = new Sequence( PRODUCT, new Sequence( new CapturingGroup("SYMBOL", new PrioritizedChoice( new Literal { MatchText = "+" }, new Literal { MatchText = "-" } ) ), PRODUCT ).Star() ); AExpression EXPRESSION = new RecursionCreate("ParethesisFunction", new CapturingGroup("EXPRESSION", SUM)); #endregion var input = "((((12/3)+5-2*(81/9))+1))"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); EXPRESSION.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode node = visitor.AST; Assert.IsTrue(node.Token.ValueAsString(iterator) == input); }
public void Terminal_CodePoint_Binary() { Assert.IsTrue((Byte) 'a' == 97); Assert.IsTrue((Byte) 'a' == 0x61); var input = "a"; var bytes = Encoding.UTF8.GetBytes(input); var iterator = new ByteInputIterator(bytes); var visitor = new NpegParserVisitor(iterator); var binary = new CapturingGroup("Binary", new CodePoint {Match = "#b1100001"} ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); AstNode ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "a"); input = "aa"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint {Match = "#b0110000101100001"} ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "aa"); // Byte boundary tests input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint {Match = "#b00001100001"} ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); input = "\0a"; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new Sequence(new CodePoint {Match = "#b000"}, new CodePoint {Match = "#b01100001"})); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input; This would shift input to the right by 4 bits. In this case it complete codepoint should be null and letter a."); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a"); // Don't care tests input = Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new OneOrMore(new CodePoint {Match = "#bXXXX0001"}) // #bXXXX0001 ); binary.Accept(visitor); Assert.IsTrue(visitor.IsMatch); ast = visitor.AST; Assert.IsTrue(ast.Token.Name == "Binary"); Assert.IsTrue(ast.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 })); input = Encoding.ASCII.GetString(new byte[] { 0x10 }); bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint {Match = "#bXXXX0001"}); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); // cannot consume character test input = ""; bytes = Encoding.UTF8.GetBytes(input); iterator = new ByteInputIterator(bytes); visitor = new NpegParserVisitor(iterator); binary = new CapturingGroup("Binary", new CodePoint {Match = "#bXXXX0001"}); binary.Accept(visitor); Assert.IsFalse(visitor.IsMatch); }