public void PEGrammarParser_Comment()
		{
			AExpression rule = OneOrMore(GetRule("mComment"));
			AExpression root = WrapInCapturedGroup("Test", RequireEndOfInput(rule));

			var bytes = Encoding.UTF8.GetBytes(@"//this is a single line comment.");
			var iterator = new ByteInputIterator(bytes);
			var visitor = new NpegParserVisitor(iterator);
			root.Accept(visitor);
			Assert.IsTrue(visitor.IsMatch);


			bytes = Encoding.UTF8.GetBytes(@"/*this is a multiline comment.*/");
			iterator = new ByteInputIterator(bytes);
			visitor = new NpegParserVisitor(iterator);
			root.Accept(visitor);
			Assert.IsTrue(visitor.IsMatch);


			bytes = Encoding.UTF8.GetBytes(@"/*
                        this 
                        is 
                        a   multiline 
                        comment.
                    */");
			iterator = new ByteInputIterator(bytes);
			visitor = new NpegParserVisitor(iterator);
			root.Accept(visitor);
			Assert.IsTrue(visitor.IsMatch);
		}
        public void PEGrammar_LimitingRepetition()
        {
            var grammar =
                @"
                                (?<ThreeDigitCode>): [0-9]{3,3};
                                (?<PhoneNumber>): ThreeDigitCode '-' ThreeDigitCode '-' (?<FourDigitCode>[0-9]{4});
                              ";

            var ROOT = PEGrammar.Load(grammar);

            var input    = "123-456-7890";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "PhoneNumber");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
            Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123");
            Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456");
            Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode");
            Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890");
        }
        public void PEGrammar_PhoneNumber()
        {
            var input = "123-456-7890";

            var PhoneNumber = PEGrammar.Load(
                @"
                        (?<ThreeDigitCode>): [0-9] [0-9] [0-9];
                        (?<FourDigitCode>): [0-9] [0-9] [0-9] [0-9];
                        (?<PhoneNumber>): ThreeDigitCode '-' ThreeDigitCode '-' FourDigitCode;
                    "
                .Trim());

            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            PhoneNumber.Accept(visitor);

            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "PhoneNumber");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
            Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123");
            Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456");
            Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode");
            Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890");
        }
        public void Iterator_Index()
        {
            var bytes = Encoding.UTF8.GetBytes("01234567890123456789");
            var iterator = new ByteInputIterator(bytes);
            Assert.IsTrue(iterator.Index == 0);
            Assert.IsTrue(iterator.Length == bytes.Length);

            for (int i = 0; i < bytes.Length; i++)
            {
                Assert.IsTrue(iterator.Index == i);
                Assert.IsTrue(iterator.Current() == bytes[i]);
                if (i < bytes.Length - 1)
                {
                    Assert.IsTrue(iterator.Next() == bytes[i + 1]);
                }
            }

            for (int i = bytes.Length - 1; i >= 0; i--)
            {
                Assert.IsTrue(iterator.Index == i);
                Assert.IsTrue(iterator.Current() == bytes[i]);
                if (i > 0)
                {
                    Assert.IsTrue(iterator.Previous() == bytes[i - 1]);
                }
            }
        }
Example #5
0
        public void Iterator_Index()
        {
            var bytes    = Encoding.UTF8.GetBytes("01234567890123456789");
            var iterator = new ByteInputIterator(bytes);

            Assert.IsTrue(iterator.Index == 0);
            Assert.IsTrue(iterator.Length == bytes.Length);

            for (int i = 0; i < bytes.Length; i++)
            {
                Assert.IsTrue(iterator.Index == i);
                Assert.IsTrue(iterator.Current() == bytes[i]);
                if (i < bytes.Length - 1)
                {
                    Assert.IsTrue(iterator.Next() == bytes[i + 1]);
                }
            }

            for (int i = bytes.Length - 1; i >= 0; i--)
            {
                Assert.IsTrue(iterator.Index == i);
                Assert.IsTrue(iterator.Current() == bytes[i]);
                if (i > 0)
                {
                    Assert.IsTrue(iterator.Previous() == bytes[i - 1]);
                }
            }
        }
        public void Terminal_DynamicBackReference()
        {
            #region Composite

            AExpression TAG = new CapturingGroup("TAG",
                                                 new OneOrMore(
                                                     new CharacterClass {
                ClassExpression = "[a-zA-Z0-9]"
            }
                                                     )
                                                 );

            AExpression StartTag = new CapturingGroup("START_TAG",
                                                      new Sequence(
                                                          new Literal {
                MatchText = "<"
            }, TAG)
                                                      .Sequence(
                                                          new Literal {
                MatchText = ">"
            }
                                                          )
                                                      );

            AExpression EndTag = new CapturingGroup("END_TAG",
                                                    new Sequence(
                                                        new Literal {
                MatchText = "</"
            },
                                                        new DynamicBackReference
            {
                BackReferenceName = "TAG",
                IsCaseSensitive   = true
            }
                                                        )
                                                    .Sequence(
                                                        new Literal {
                MatchText = ">"
            }
                                                        )
                                                    );


            AExpression Body = new CapturingGroup("Body", new Sequence(new NotPredicate(EndTag), new AnyCharacter()).Star());

            AExpression Expression = new CapturingGroup("Expression", new Sequence(StartTag, Body).Sequence(EndTag).Plus());

            #endregion

            String input    = "<h1>hello</h1><h2>hello</h2>";
            var    bytes    = Encoding.UTF8.GetBytes(input);
            var    iterator = new ByteInputIterator(bytes);
            var    visitor  = new NpegParserVisitor(iterator);

            Expression.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode ast = visitor.AST;
#warning write tree
        }
Example #7
0
        public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice3()
        {
            var prefix = new PrioritizedChoice(
                new CapturingGroup("AndPredicate", new Literal {
                MatchText = "&"
            }),
                new CapturingGroup("NotPredicate", new Literal {
                MatchText = "!"
            })
                );

            PrioritizedChoice suffix = new PrioritizedChoice(
                new CapturingGroup("ZeroOrMore", new Literal {
                MatchText = "*"
            }),
                new CapturingGroup("OneOrMore", new Literal {
                MatchText = "+"
            })
                )
                                       .Or(new CapturingGroup("Optional", new Literal {
                MatchText = "?"
            }));

            var terminal = new CapturingGroup("AnyCharacter", new Literal {
                MatchText = "."
            });
            var expression = new CapturingGroup("Expression",
                                                new PrioritizedChoice(
                                                    // match prefixes first
                                                    prefix.Plus()
                                                    .Sequence(terminal)
                                                    ,
                                                    // match suffixes next
                                                    terminal
                                                    .Sequence(
                                                        suffix.Plus()
                                                        )
                                                    )
                                                .Or(terminal)
                                                .Plus()
                                                );

            var input    = ".";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            expression.Accept(visitor);

            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Token.Name == "Expression");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == ".");
            Assert.IsTrue(node.Children[0].Token.Name == "AnyCharacter");
        }
		public void PEGrammarParser_NewLine()
		{
			AExpression rule = OneOrMore(GetRule("mNewLine"));
			AExpression root = WrapInCapturedGroup("Test", RequireEndOfInput(rule));

			// notice only matches newlines of linux/win/mac/ 

			var bytes = Encoding.UTF8.GetBytes("\n\n\r\n\r\r");
			var iterator = new ByteInputIterator(bytes);
			var visitor = new NpegParserVisitor(iterator);
			root.Accept(visitor);
			Assert.IsTrue(visitor.IsMatch);
		}
		public void PEGrammarParser_Space()
		{
			AExpression rule = OneOrMore(GetRule("mSpace"));
			AExpression root = WrapInCapturedGroup("Test", RequireEndOfInput(rule));


			var bytes = Encoding.UTF8.GetBytes(" \t         \t\t           \t");
			var iterator = new ByteInputIterator(bytes);
			var visitor = new NpegParserVisitor(iterator);

			root.Accept(visitor);
			Assert.IsTrue(visitor.IsMatch);
		}
Example #10
0
        public void Iterator_OutofRange()
        {
            var bytes    = Encoding.UTF8.GetBytes("");
            var iterator = new ByteInputIterator(bytes);

            Assert.IsTrue(iterator.Index == 0);
            Assert.IsTrue(iterator.Length == 0);
            Assert.IsTrue(iterator.Current() == -1);
            Assert.IsTrue(iterator.Index == 0);
            Assert.IsTrue(iterator.Next() == -1);
            Assert.IsTrue(iterator.Index == 0);
            Assert.IsTrue(iterator.Previous() == -1);
            Assert.IsTrue(iterator.Index == 0);
        }
        public void PracticalExample_PhoneNumber()
        {
            #region terminals

            AExpression Digits = new CharacterClass {
                ClassExpression = "[0-9]"
            };
            AExpression Hyphen = new Literal {
                MatchText = "-"
            };

            #endregion

            #region nonterminals

            AExpression ThreeDigitCode = new CapturingGroup("ThreeDigitCode", new Sequence(Digits, Digits).Sequence(Digits));

            AExpression FourDigitCode = new CapturingGroup("FourDigitCode",
                                                           new Sequence(Digits, Digits).Sequence(Digits).Sequence(Digits));

            AExpression PhoneNumber = new CapturingGroup("PhoneNumber",
                                                         new Sequence(ThreeDigitCode, Hyphen)
                                                         .Sequence(ThreeDigitCode)
                                                         .Sequence(Hyphen)
                                                         .Sequence(FourDigitCode)
                                                         );

            #endregion

            String input = "123-456-7890";


            // Test Manual Composite

            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);
            PhoneNumber.Accept(visitor);

            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "PhoneNumber");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == "123-456-7890");
            Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123");
            Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456");
            Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode");
            Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890");
        }
Example #12
0
        public void Iterator_Initialization()
        {
            var input    = "01234567890123456789";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);

            // tests that iterator begins at zero based index
            Assert.IsTrue(iterator.Index == 0);
            Assert.IsTrue(iterator.Length == 20);
            Assert.IsTrue(iterator.Current() == '0');
            Assert.IsTrue(iterator.Next() == '1');
            Assert.IsTrue(iterator.Previous() == '0');
            Assert.IsTrue(bytes.SequenceEqual(iterator.Text(0, 19)), "Text unable to return complete input.");
        }
        public void Iterator_Initialization()
        {
            var input = "01234567890123456789";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);

            // tests that iterator begins at zero based index
            Assert.IsTrue(iterator.Index == 0);
            Assert.IsTrue(iterator.Length == 20);
            Assert.IsTrue(iterator.Current() == '0');
            Assert.IsTrue(iterator.Next() == '1');
            Assert.IsTrue(iterator.Previous() == '0');
            Assert.IsTrue(bytes.SequenceEqual(iterator.Text(0, 19)), "Text unable to return complete input.");
        }
Example #14
0
        public void NonTerminal_Predicate_And()
        {
            // predicates should not adjust the
            // iterator once the expression is evaluated.
            AExpression Digit = new CharacterClass {ClassExpression = "[0-9]"};

            // regex expression: \d+
            var input = Encoding.UTF8.GetBytes("01234567890123456789");
            var iterator = new ByteInputIterator(input);
            AExpression andPredicate = new OneOrMore(Digit).And();
            var visitor = new NpegParserVisitor(iterator);
            andPredicate.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 0);
        }
Example #15
0
        public static AExpression Load(String rules)
        {
            var rootExpression = RootPegExpression();
            var iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(rules));
            var visitor = new NpegParserVisitor(iterator, new PeGrammarAstNodeFactory(iterator));

            rootExpression.Accept(visitor);
            if (visitor.IsMatch)
            {
                var interpret = (InterpreterAstNode)visitor.AST;
                return interpret.Expression;
            }

            throw new InvalidRuleException();
        }
Example #16
0
        public void NonTerminal_Predicate_Or()
        {
            // predicates should not adjust the
            // iterator once the expression is evaluated.
            AExpression Digit = new CharacterClass {ClassExpression = "[0-9]"};

            // equivalent to: regex '^' '$'
            // regex expression: ^\d+$
            var bytes = Encoding.UTF8.GetBytes("0123456abcdefg");
            var iterator = new ByteInputIterator(bytes);
            AExpression notPredicate = new OneOrMore(Digit).And().Sequence(new NotPredicate(new AnyCharacter()));
            var visitor = new NpegParserVisitor(iterator);
            notPredicate.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch); // should fail
            Assert.IsTrue(iterator.Index == 0);
        }
        public void PEGrammar_Interpreter_CodePoint()
        {
            AExpression ROOT = PEGrammar.Load(
                @"
                    (?<Value>): #x20;
                "
                );

            String input    = " ";
            var    bytes    = Encoding.UTF8.GetBytes(input);
            var    iterator = new ByteInputIterator(bytes);
            var    visitor  = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
        }
        public void PEGrammar_Literal()
        {
            AExpression caseSensitive = PEGrammar.Load(@"(?<Expression>): 'Hello World';");

            var input    = "hello world";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            caseSensitive.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);

            AExpression notCaseSensitive = PEGrammar.Load(@"(?<Expression>): 'Hello World'\i;");

            input    = "hello world";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            notCaseSensitive.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "Expression");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);


            // not sure if it would be better to use verbatim identifier @"" for escaping
            // escape back slash inside double quotes
            input = @"\";
            AExpression escape = PEGrammar.Load(@"(?<Literal>): ""\\"";");

            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            escape.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(@"\" == visitor.AST.Token.ValueAsString(iterator));

            input    = @"\";
            escape   = PEGrammar.Load(@"(?<Literal>): '\\';");
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            escape.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(@"\" == visitor.AST.Token.ValueAsString(iterator));
        }
        public void Terminal_Any()
        {
            var         input    = "ijk";
            var         bytes    = Encoding.UTF8.GetBytes(input);
            var         iterator = new ByteInputIterator(bytes);
            AExpression any      = new Sequence(new AnyCharacter(), new AnyCharacter());
            var         visitor  = new NpegParserVisitor(iterator);

            any.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 2,
                          "Expected two characters to be consumed and Iterator updated by 2.  0, 1 .. points to 2");

            input    = "ij";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            any.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 2,
                          "Expected two characters to be consumed and Iterator updated by 2.  0, 1 .. points to 2");

            input    = "";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            any      = new AnyCharacter();
            visitor  = new NpegParserVisitor(iterator);
            any.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 0, "Expected no characters to be consumed and index stay at zero.");

            var number = new Sequence(
                new OneOrMore(new CharacterClass {
                ClassExpression = "[0-9]"
            }),
                new NotPredicate(
                    new AnyCharacter()
                    )
                );

            input    = "012345.";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            number.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
        }
        public void NonTerminal_Predicate_And()
        {
            // predicates should not adjust the
            // iterator once the expression is evaluated.
            AExpression Digit = new CharacterClass {
                ClassExpression = "[0-9]"
            };

            // regex expression: \d+
            var         input        = Encoding.UTF8.GetBytes("01234567890123456789");
            var         iterator     = new ByteInputIterator(input);
            AExpression andPredicate = new OneOrMore(Digit).And();
            var         visitor      = new NpegParserVisitor(iterator);

            andPredicate.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 0);
        }
Example #21
0
        public void Terminal_CharacterClass()
        {
            AExpression Digit = new CharacterClass {ClassExpression = "[0-9]"};

            var input = "0";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            Digit.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);

            input = "0123456789";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            new OneOrMore(Digit).Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
        }
        public void Iterator_GetText_Limit()
        {
            var bytes = Encoding.UTF8.GetBytes("01234567890123456789");
            var iterator = new ByteInputIterator(bytes);
            Assert.IsTrue(Encoding.ASCII.GetBytes("0").SequenceEqual(iterator.Text(0, 0)), "Text unable to return first character.");
            Assert.IsTrue(Encoding.ASCII.GetBytes("9").SequenceEqual(iterator.Text(19, 19)), "Text unable to return last character.");
            Assert.IsTrue(Encoding.ASCII.GetBytes("01").SequenceEqual(iterator.Text(0, 1)),
                          "Text unable to return specified start and end characters inclusive.");

            try
            {
                iterator.Text(19, 0);
                Assert.Fail("Start must be <= End");
            }
            catch (IteratorUsageException e)
            {
            }
        }
        public void Terminal_CodePoint_Decimal()
        {
            var input     = "&";
            var bytes     = Encoding.UTF8.GetBytes(input);
            var iterator  = new ByteInputIterator(bytes);
            var visitor   = new NpegParserVisitor(iterator);
            var codepoint = new CapturingGroup("CodePoint",
                                               new CodePoint {
                Match = "#38"
            }
                                               );

            codepoint.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode ast = visitor.AST;

            Assert.IsTrue(ast.Token.Name == "CodePoint");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "&");
        }
        public void NonTerminal_Predicate_Or()
        {
            // predicates should not adjust the
            // iterator once the expression is evaluated.
            AExpression Digit = new CharacterClass {
                ClassExpression = "[0-9]"
            };

            // equivalent to: regex '^' '$'
            // regex expression: ^\d+$
            var         bytes        = Encoding.UTF8.GetBytes("0123456abcdefg");
            var         iterator     = new ByteInputIterator(bytes);
            AExpression notPredicate = new OneOrMore(Digit).And().Sequence(new NotPredicate(new AnyCharacter()));
            var         visitor      = new NpegParserVisitor(iterator);

            notPredicate.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);             // should fail
            Assert.IsTrue(iterator.Index == 0);
        }
Example #25
0
        public void Terminal_Any()
        {
            var input = "ijk";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            AExpression any = new Sequence(new AnyCharacter(), new AnyCharacter());
            var visitor = new NpegParserVisitor(iterator);
            any.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 2,
                          "Expected two characters to be consumed and Iterator updated by 2.  0, 1 .. points to 2");

            input = "ij";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            any.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 2,
                          "Expected two characters to be consumed and Iterator updated by 2.  0, 1 .. points to 2");

            input = "";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            any = new AnyCharacter();
            visitor = new NpegParserVisitor(iterator);
            any.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
            Assert.IsTrue(iterator.Index == 0, "Expected no characters to be consumed and index stay at zero.");

            var number = new Sequence(
                new OneOrMore(new CharacterClass {ClassExpression = "[0-9]"}),
                new NotPredicate(
                    new AnyCharacter()
                    )
                );
            input = "012345.";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            number.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
        }
Example #26
0
        public void Iterator_GetText_Limit()
        {
            var bytes    = Encoding.UTF8.GetBytes("01234567890123456789");
            var iterator = new ByteInputIterator(bytes);

            Assert.IsTrue(Encoding.ASCII.GetBytes("0").SequenceEqual(iterator.Text(0, 0)), "Text unable to return first character.");
            Assert.IsTrue(Encoding.ASCII.GetBytes("9").SequenceEqual(iterator.Text(19, 19)), "Text unable to return last character.");
            Assert.IsTrue(Encoding.ASCII.GetBytes("01").SequenceEqual(iterator.Text(0, 1)),
                          "Text unable to return specified start and end characters inclusive.");

            try
            {
                iterator.Text(19, 0);
                Assert.Fail("Start must be <= End");
            }
            catch (IteratorUsageException e)
            {
            }
        }
        public void Terminal_CharacterClass()
        {
            AExpression Digit = new CharacterClass {
                ClassExpression = "[0-9]"
            };

            var input    = "0";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            Digit.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);

            input    = "0123456789";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            new OneOrMore(Digit).Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
        }
        public void PEGrammar_LimitingRepetition_VariableExpression()
        {
            var grammar =
                @"
					(?<ESC_AMP_Y>): . . . (?<C1>.) (?<C2>.) 
					(
						((?<X> .) (?<D> .{3})) 
					){(\k<C2> - \k<C1>)+1};

             ";

            var ROOT = PEGrammar.Load(grammar);

            //.     .      .    C1    C2    X     D     D      D
            var bytes    = new byte[] { 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 };
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "ESC_AMP_Y");
            Assert.IsTrue(node.Token.End == bytes.Length - 1);             // zero index

            //.     .      .    C1    C2
            bytes = new byte[] { 0x00, 0x00, 0x00, 0x01, 0x02,
                                 0x00, 0x00, 0x00, 0x00, //X     D     D      D
                                 0x00, 0x00, 0x00, 0x00, //X     D     D      D
                                 0x00 };
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "ESC_AMP_Y");
            Assert.IsTrue(node.Token.End == bytes.Length - 2);             // zero index - expect additional character to not be consumed
        }
        public void PEGrammar_MathematicalFormula_Recursion()
        {
            AExpression ROOT = PEGrammar.Load(
                @"
                    (?<Value>): [0-9]+ / '(' Expr ')';
                    (?<Product>): Value ((?<Symbol>'*' / '/') Value)*;
                    (?<Sum>): Product ((?<Symbol>'+' / '-') Product)*;
                    (?<Expr>): Sum;
                "
                );

            String input    = "((((12/3)+5-2*(81/9))+1))";
            var    bytes    = Encoding.UTF8.GetBytes(input);
            var    iterator = new ByteInputIterator(bytes);
            var    visitor  = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
#warning does not specify expected tree
        }
        public void PEGrammar_RecursiveParentheses()
        {
            var input = "((((((123))))))";
            var bytes = Encoding.UTF8.GetBytes(input);

            AExpression ROOT = PEGrammar.Load(
                @"
                        (?<DIGITS>): ([0-9])+;
                        (?<ENCLOSEDDIGITS>): '(' ParethesisFunction ')';
                        ParethesisFunction: (DIGITS / ENCLOSEDDIGITS);
                        (?<RECURSIONTEST>): ParethesisFunction;
                    "
                .Trim());

            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
        }
        public void Terminal_Literal()
        {
            var Mixed = new Literal {
                MatchText = "Hello World"
            };

            var input    = "hello world";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            Mixed.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);

            // Not case sensitve
            Mixed.IsCaseSensitive = false;

            input    = "hello world";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Mixed.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
        }
        public void PEGrammar_DynamicBackReference_Xml()
        {
            var grammar =
                @"
					(?<Tag>): [a-zA-Z0-9]+;
					(?<StartTag>): '<' Tag '>';
					(?<EndTag>): '</' \k<Tag> '>' ;
					(?<Body>): (Xml / (!EndTag .))+;
					(?<Xml>): (StartTag Body EndTag )+;
			"            ;

            var input = @"
					<test>
						test data start
						<test1>
							test1 data start
							<test2>
								text2 data start
								text2 data end
							</test2>
							test1 data end
						</test1>
						test data end
					</test>
			"            .Trim();

            var ROOT     = PEGrammar.Load(grammar);
            var iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input));
            var visitor  = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            throw new NotImplementedException("Refactoring - plan on changing backreferencing logic inside NPEGParser - just placeholder of failing test for now; conserve memory");
        }
        public void PEGrammar_Literal()
        {
            AExpression caseSensitive = PEGrammar.Load(@"(?<Expression>): 'Hello World';");

            var input = "hello world";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            caseSensitive.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);

            AExpression notCaseSensitive = PEGrammar.Load(@"(?<Expression>): 'Hello World'\i;");
            input = "hello world";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            notCaseSensitive.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Expression");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);

            // not sure if it would be better to use verbatim identifier @"" for escaping
            // escape back slash inside double quotes
            input = @"\";
            AExpression escape = PEGrammar.Load(@"(?<Literal>): ""\\"";");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            escape.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(@"\" == visitor.AST.Token.ValueAsString(iterator));

            input = @"\";
            escape = PEGrammar.Load(@"(?<Literal>): '\\';");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            escape.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(@"\" == visitor.AST.Token.ValueAsString(iterator));
        }
        public void PEGrammar_BooleanAlgebra()
        {
            String grammar =
                @"
                    S: [\s]+;
                    (?<Gate>): ('*' / 'AND') / ('~*' / 'NAND') / ('+' / 'OR') / ('~+' / 'NOR') / ('^' / 'XOR') / ('~^' / 'XNOR');
                    ValidVariable: '""' (?<Variable>[a-zA-Z0-9]+) '""'  / '\'' (?<Variable>[a-zA-Z0-9]+) '\'' / (?<Variable>[a-zA-Z]);
                    VarProjection1: ValidVariable /  (?<Invertor>'!' ValidVariable);
                    VarProjection2: VarProjection1 / '(' Expression ')' / (?<Invertor>'!' '(' Expression ')');
                    Expression: S? VarProjection2 S? (Gate S? VarProjection2 S?)*;
                    (?<BooleanEquation>): Expression !.;
                "
                    .Trim();

            AExpression ROOT = PEGrammar.Load(grammar);

            // single variable
            var input = ("A*!B+!A*B");
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            #warning Assert.IsTrue(node.Token.Value == input);

            // quoted variable
            input = ("'aA'*!'bB'+!'aA'*'bB'");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning             Assert.IsTrue(node.Token.Value == input);

            // expression + gate + variable .star()
            input = ("A*!B*C+!A*B*C");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning             Assert.IsTrue(node.Token.Value == input);

            // parethesis
            input = ("((A)*(!B)+(!A)*(B))");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning            Assert.IsTrue(node.Token.Value == input);

            input = ("((A)*!(B)+!(A)*(B))");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning            Assert.IsTrue(node.Token.Value == input);

            input = ("((A)*(!(B))+(!(A))*(B))");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning            Assert.IsTrue(node.Token.Value == input);

            input = ("(!X*Y*!Z)");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning             Assert.IsTrue(node.Token.Value == input);

            input = ("(!X*Y*!Z)+(!X*Y*Z)");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning             Assert.IsTrue(node.Token.Value == input);

            input = ("(X*Z)");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning             Assert.IsTrue(node.Token.Value == input);

            input = ("(!X*Y*!Z)+(!X*Y*Z)+(X*Z)");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning            Assert.IsTrue(node.Token.Value == input);

            input = ("((((!X*Y*Z)+(!X*Y*!Z)+(X*Z))))");
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            #warning            Assert.IsTrue(node.Token.Value == input);
        }
        public void PEGrammar_Interpreter_Warn()
        {
            AExpression ROOT = PEGrammar.Load(
                @"
                    (?<Value>): Warn<'warning'>;
                "
                );

            String input = " ";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            Assert.IsTrue(visitor.Warnings.Count == 1);
        }
        public void CompositeVisitor_Recursiveness()
        {
            var whitespace = new CharacterClass {ClassExpression = "[ \t\r\n\v]"};

            var terminal = new PrioritizedChoice(
                new CapturingGroup("AnyCharacter", new Literal {MatchText = "."})
                ,
                new CapturingGroup("CapturingGroup",
                                   new Sequence(
                                   	new Literal {MatchText = "(?<"},
                                   	new CapturingGroup("ReplacementNode",
                                   	                   new OneOrMore(
                                   	                   	new CharacterClass {ClassExpression = "[a-z0-9A-Z]"}
                                   	                   	)
                                   		)
                                   	)
                                   	.Sequence(new Literal {MatchText = ">"})
                                   	.Sequence(new RecursionCall("Expression"))
                                   	.Sequence(new Literal {MatchText = ")"})
                    )
                );

            var sequence = new CapturingGroup(
                "Sequence",
                new Sequence(
                    terminal,
                    new ZeroOrMore(whitespace)
                    ).Plus()
                ) {DoReplaceBySingleChildNode = true};

            var prioritizedchoice = new CapturingGroup("PrioritizedChoice",
                                                       new Sequence(
                                                       	sequence,
                                                       	new Literal {MatchText = "/"}
                                                       	)
                                                       	.Sequence(new ZeroOrMore(whitespace))
                                                       	.Sequence(sequence)
                                                       	.Sequence(
                                                       		new ZeroOrMore(
                                                       			new Sequence(
                                                       				new ZeroOrMore(whitespace),
                                                       				new Literal {MatchText = "/"}
                                                       				)
                                                       				.Sequence(new ZeroOrMore(whitespace))
                                                       				.Sequence(sequence)
                                                       				.Plus()
                                                       			)
                                                       	)
                );

            var expression = new CapturingGroup("Root",
                                                new RecursionCreate("Expression",
                                                                    new PrioritizedChoice(prioritizedchoice, sequence)));

            var input = @"(?<NPEGNode>./.. )";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            expression.Accept(visitor);

            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Root");
            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Children[0].Token.Name == "CapturingGroup");
            Assert.IsTrue(node.Children[0].Children.Count == 2);
            Assert.IsTrue(node.Children[0].Children[0].Token.Name == "ReplacementNode");
            Assert.IsTrue(node.Children[0].Children[1].Token.Name == "PrioritizedChoice");
            Assert.IsTrue(node.Children[0].Children[1].Children[0].Token.Name == "AnyCharacter");
            Assert.IsTrue(node.Children[0].Children[1].Children[1].Token.Name == "Sequence");
            Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[0].Token.Name == "AnyCharacter");
            Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[1].Token.Name == "AnyCharacter");
        }
        public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice1()
        {
            PrioritizedChoice newline = new PrioritizedChoice(
                new Literal {MatchText = "\r\n"}, // windows
                new Literal {MatchText = "\r\r"} // old macs
                )
                .Or(new Literal {MatchText = "\n"}); // linux

            // Single Line Comment
            var singleLineComment = new Sequence(
                new Literal {MatchText = "//"},
                new Sequence(
                    new NotPredicate(newline),
                    new AnyCharacter()
                    )
                    .Star()
                );

            // Multiline Comment
            var multiLineComment = new Sequence(
                new Literal {MatchText = "/*"},
                new Sequence(
                    new NotPredicate(new Literal {MatchText = "*/"}),
                    new AnyCharacter()
                    )
                    .Star()
                    .Sequence(new Literal {MatchText = "*/"})
                );

            var comment = new PrioritizedChoice(singleLineComment, multiLineComment);

            var whitespace = new PrioritizedChoice(
                new CharacterClass {ClassExpression = "[ \t\r\n\v]"},
                comment
                );

            var label = new CapturingGroup("Label",
                                           new Sequence(
                                           	new CharacterClass {ClassExpression = "[a-zA-Z_]"},
                                           	// must start with alpha character
                                           	new ZeroOrMore(new CharacterClass {ClassExpression = "[a-zA-Z0-9_]"})
                                           	)
                );

            var backreference = new CapturingGroup("DynamicBackReferencing",
                                                   new Sequence(
                                                   	new Literal {MatchText = @"\k<"},
                                                   	new Sequence(new ZeroOrMore(whitespace), label).Sequence(
                                                   		new ZeroOrMore(whitespace))
                                                   	)
                                                   	.Sequence(
                                                   		new Optional(
                                                   			new Sequence(
                                                   				new Sequence(
                                                   					new Literal {MatchText = "["},
                                                   					new CapturingGroup("CaseSensitive",
                                                   					                   new Literal {MatchText = @"\i"}
                                                   						)
                                                   					),
                                                   				new Literal {MatchText = "]"}
                                                   				)
                                                   			)
                                                   	)
                                                   	.Sequence(
                                                   		new Sequence(new ZeroOrMore(whitespace), new Literal {MatchText = ">"})
                                                   	)
                );

            var root = new CapturingGroup("Test",
                                          new Sequence(
                                          	backreference,
                                          	new NotPredicate(new AnyCharacter())
                                          	)
                );

            var input = @"\k< CapturedLabelVariableName >";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);

            root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Test");
            Assert.IsTrue(node.Children[0].Token.Name == "DynamicBackReferencing");
            Assert.IsTrue(node.Children[0].Children[0].Token.Name == "Label");
            Assert.IsTrue(node.Children[0].Children[0].Token.ValueAsString(iterator) == "CapturedLabelVariableName");
        }
Example #38
0
        public void CompositeVisitor_NestedRecursive()
        {
            #region Composite

            var DIGITS = new CapturingGroup("DIGITS", new OneOrMore(new CharacterClass {
                ClassExpression = "[0-9]"
            }));
            var LTENCLOSED = new RecursionCreate("RECURSIONLTENCLOSED",
                                                 new PrioritizedChoice(DIGITS,
                                                                       new CapturingGroup("LTENCLOSED",
                                                                                          new Sequence(
                                                                                              new Literal {
                MatchText = "<"
            },
                                                                                              new RecursionCall(
                                                                                                  "RECURSIONLTENCLOSED")
                                                                                              ).Sequence(new Literal
            {
                MatchText = ">"
            })
                                                                                          )
                                                                       )
                                                 );
            var PENCLOSED = new RecursionCreate("RECURSIONPENCLOSED",
                                                new PrioritizedChoice(LTENCLOSED,
                                                                      new CapturingGroup("PENCLOSED",
                                                                                         new Sequence(
                                                                                             new Literal {
                MatchText = "("
            },
                                                                                             new RecursionCall("RECURSIONPENCLOSED")
                                                                                             ).Sequence(new Literal
            {
                MatchText = ")"
            })
                                                                                         )
                                                                      )
                                                );

            AExpression ROOT = new CapturingGroup("NESTEDRECURSIONTEST", PENCLOSED);

            #endregion

            var input    = "(((<<<123>>>)))";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
            Assert.IsTrue(node.Token.Name == "NESTEDRECURSIONTEST");
            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Children[0].Token.Name == "PENCLOSED");
            Assert.IsTrue(node.Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Token.Name == "PENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Token.Name == "PENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1);
        }
Example #39
0
        public void Terminal_CodePoint_Hexadecimal()
        {
            Assert.IsTrue((Byte) 'a' == 97);
            Assert.IsTrue((Byte) 'a' == 0x61);
            var input = "a";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            var Hexadecimal = new CapturingGroup("Hexadecimal",
                new CodePoint {Match = "#x61"}
            );
            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == "a");

            // Byte boundary tests
            input = "\na";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                new CodePoint {Match = "#xA61"}
            );
            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be 0A = \n and letter a.");
            node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == "\na");

            input = "\0a";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new CodePoint {Match = "#x061"}
                );
            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be 00 = \0 and letter a.");
            node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == "\0a");

            // Don't care tests
            bytes = new byte[] {0x11, 0x01, 0x71, 0x03, 0x00};
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new OneOrMore(new CodePoint {Match = "#xX1"}) // #bXXXX0001
            );
            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] {0x11, 0x01, 0x71}));

            iterator = new ByteInputIterator(new byte[] { 0x10 });
            visitor = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new CodePoint {Match = "#xX1"}
                );
            Hexadecimal.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);

            // cannot consume character test
            input = string.Empty;
            iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input));
            visitor = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new CodePoint {Match = "#xX1"}
                );
            Hexadecimal.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
        }
        public void Terminal_CodePoint_Hexadecimal()
        {
            Assert.IsTrue((Byte)'a' == 97);
            Assert.IsTrue((Byte)'a' == 0x61);
            var input       = "a";
            var bytes       = Encoding.UTF8.GetBytes(input);
            var iterator    = new ByteInputIterator(bytes);
            var visitor     = new NpegParserVisitor(iterator);
            var Hexadecimal = new CapturingGroup("Hexadecimal",
                                                 new CodePoint {
                Match = "#x61"
            }
                                                 );

            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == "a");


            // Byte boundary tests
            input       = "\na";
            bytes       = Encoding.UTF8.GetBytes(input);
            iterator    = new ByteInputIterator(bytes);
            visitor     = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new CodePoint {
                Match = "#xA61"
            }
                                             );
            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be 0A = \n and letter a.");
            node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == "\na");

            input       = "\0a";
            bytes       = Encoding.UTF8.GetBytes(input);
            iterator    = new ByteInputIterator(bytes);
            visitor     = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new CodePoint {
                Match = "#x061"
            }
                                             );
            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be 00 = \0 and letter a.");
            node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == "\0a");


            // Don't care tests
            bytes       = new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 };
            iterator    = new ByteInputIterator(bytes);
            visitor     = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new OneOrMore(new CodePoint {
                Match = "#xX1"
            })                                                                                         // #bXXXX0001
                                             );
            Hexadecimal.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "Hexadecimal");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 }));


            iterator    = new ByteInputIterator(new byte[] { 0x10 });
            visitor     = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new CodePoint {
                Match = "#xX1"
            }
                                             );
            Hexadecimal.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);


            // cannot consume character test
            input       = string.Empty;
            iterator    = new ByteInputIterator(Encoding.UTF8.GetBytes(input));
            visitor     = new NpegParserVisitor(iterator);
            Hexadecimal = new CapturingGroup("Hexadecimal",
                                             new CodePoint {
                Match = "#xX1"
            }
                                             );
            Hexadecimal.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
        }
        public void Terminal_CodePoint_Binary()
        {
            Assert.IsTrue((Byte)'a' == 97);
            Assert.IsTrue((Byte)'a' == 0x61);

            var input    = "a";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);
            var binary   = new CapturingGroup("Binary",
                                              new CodePoint {
                Match = "#b1100001"
            }
                                              );

            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode ast = visitor.AST;

            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "a");


            input    = "aa";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            binary   = new CapturingGroup("Binary",
                                          new CodePoint {
                Match = "#b0110000101100001"
            }
                                          );
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "aa");


            // Byte boundary tests
            input    = "\0a";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            binary   = new CapturingGroup("Binary",
                                          new CodePoint {
                Match = "#b00001100001"
            }
                                          );
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be null and letter a.");
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a");


            input    = "\0a";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            binary   = new CapturingGroup("Binary", new Sequence(new CodePoint {
                Match = "#b000"
            }, new CodePoint {
                Match = "#b01100001"
            }));
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch,
                          "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be null and letter a.");
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a");


            // Don't care tests
            input    = Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 });
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            binary   = new CapturingGroup("Binary",
                                          new OneOrMore(new CodePoint {
                Match = "#bXXXX0001"
            })                                                                       // #bXXXX0001
                                          );
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 }));


            input    = Encoding.ASCII.GetString(new byte[] { 0x10 });
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            binary   = new CapturingGroup("Binary", new CodePoint {
                Match = "#bXXXX0001"
            });
            binary.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);


            // cannot consume character test
            input    = "";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            binary   = new CapturingGroup("Binary", new CodePoint {
                Match = "#bXXXX0001"
            });
            binary.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
        }
        public void Terminal_LimitingRepetition()
        {
            // min
            // min max
            // max
            // math expression using back referencing {(\k<C2> - \k<C1>)+1} - variable length protocols

            AExpression Digits = new CharacterClass {
                ClassExpression = "[0-9]"
            };

            #region nonterminals

            var MinTrue0 = new CapturingGroup("MinTrue",
                                              new LimitingRepetition(Digits)
            {
                Min = 0
            }
                                              );
            var MinFalse = new CapturingGroup("MinFalse",
                                              new LimitingRepetition(Digits)
            {
                Min = 44
            }
                                              );


            var MinTrue5 = new CapturingGroup("MinTrue",
                                              new LimitingRepetition(Digits)
            {
                Min = 5
            }
                                              );
            var MaxTrue = new CapturingGroup("MaxTrue",
                                             new LimitingRepetition(Digits)
            {
                Max = 5
            }
                                             );
            var MinMax = new CapturingGroup("MinMax",
                                            new LimitingRepetition(Digits)
            {
                Min = 5, Max = 6
            }
                                            );


            var ExceptionNoMinMax = new CapturingGroup("ExceptionNoMinMax",
                                                       new LimitingRepetition(Digits)
            {
            }
                                                       );
            var ExceptionMaxLessThanMin = new CapturingGroup("ExceptionMaxLessThanMin",
                                                             new LimitingRepetition(Digits)
            {
                Min = 5, Max = 0
            }
                                                             );

            #endregion

            String input    = "1234567890";
            var    bytes    = Encoding.UTF8.GetBytes(input);
            var    iterator = new ByteInputIterator(bytes);
            var    visitor  = new NpegParserVisitor(iterator);

            MinTrue0.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            iterator.Index = 0;
            visitor        = new NpegParserVisitor(iterator);
            MinFalse.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);


            iterator.Index = 0;
            visitor        = new NpegParserVisitor(iterator);
            MinTrue5.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);


            iterator.Index = 0;
            visitor        = new NpegParserVisitor(iterator);
            MaxTrue.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;


            iterator.Index = 0;
            visitor        = new NpegParserVisitor(iterator);
            MinMax.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;


            Int32 exceptionCount = 0;
            try
            {
                iterator.Index = 0;
                visitor        = new NpegParserVisitor(iterator);
                ExceptionNoMinMax.Accept(visitor);
                Assert.IsTrue(visitor.IsMatch);
                node = visitor.AST;
            }
            catch (ArgumentException)
            {
                exceptionCount++;
            }

            try
            {
                iterator.Index = 0;
                visitor        = new NpegParserVisitor(iterator);
                ExceptionMaxLessThanMin.Accept(visitor);
                Assert.IsTrue(visitor.IsMatch);
                node = visitor.AST;
            }
            catch (ArgumentException)
            {
                exceptionCount++;
            }


            Assert.IsTrue(exceptionCount == 2);
        }
        public void PracticalExample_BooleanAlgebra()
        {
            #region Composite

            //AND: */AND
            AExpression AND = new PrioritizedChoice(new Literal {
                MatchText = "*"
            }, new Literal {
                MatchText = "AND"
            });
            //NAND: ~*/NAND
            AExpression NAND = new PrioritizedChoice(new Literal {
                MatchText = "~*"
            }, new Literal {
                MatchText = "NAND"
            });


            //OR: +/OR
            AExpression OR = new PrioritizedChoice(new Literal {
                MatchText = "+"
            }, new Literal {
                MatchText = "OR"
            });
            //NOR: ~+/NOR
            AExpression NOR = new PrioritizedChoice(new Literal {
                MatchText = "~+"
            }, new Literal {
                MatchText = "NOR"
            });


            //XOR: ^/XOR
            AExpression XOR = new PrioritizedChoice(new Literal {
                MatchText = "^"
            }, new Literal {
                MatchText = "XOR"
            });
            //XNOR: ~^/XNOR
            AExpression XNOR = new PrioritizedChoice(new Literal {
                MatchText = "~^"
            }, new Literal {
                MatchText = "XNOR"
            });


            AExpression GATE = new CapturingGroup("GATE", new PrioritizedChoice(AND, NAND).Or(OR).Or(NOR).Or(XOR).Or(XNOR));


            // Variable: "[a-zA-Z0-9]+"  / '[a-zA-Z0-9]+' / [a-zA-Z]
            AExpression VARIABLE = new PrioritizedChoice(
                new Sequence(
                    new Literal {
                MatchText = "\""
            },
                    new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass {
                ClassExpression = "[a-zA-Z0-9]"
            }))
                    ).Sequence(new Literal {
                MatchText = "\""
            }),
                new Sequence(
                    new Literal {
                MatchText = "'"
            },
                    new CapturingGroup("VARIABLE", new OneOrMore(new CharacterClass {
                ClassExpression = "[a-zA-Z0-9]"
            }))
                    ).Sequence(new Literal {
                MatchText = "'"
            })
                ).Or(
                new CapturingGroup("VARIABLE", new CharacterClass {
                ClassExpression = "[a-zA-Z]"
            })
                );

            // Variable: Variable / !Variable
            VARIABLE = new PrioritizedChoice(
                VARIABLE
                ,
                new CapturingGroup("INVERTOR",
                                   new Sequence(
                                       new Literal {
                MatchText = "!"
            },
                                       VARIABLE
                                       )
                                   )
                );


            // Variable: Variable / Expression / !Expression
            VARIABLE = new PrioritizedChoice(
                VARIABLE
                ,
                new Sequence(
                    new Literal {
                MatchText = "("
            },
                    new RecursionCall("RECURSIONEXPRESSION")
                    ).Sequence(new Literal {
                MatchText = ")"
            })
                ).Or(
                new CapturingGroup("INVERTOR",
                                   new Sequence(
                                       new Literal {
                MatchText = "!"
            }
                                       ,
                                       new Sequence(
                                           new Literal {
                MatchText = "("
            },
                                           new RecursionCall("RECURSIONEXPRESSION")
                                           ).Sequence(new Literal {
                MatchText = ")"
            })
                                       )
                                   )
                );


            AExpression Root = new CapturingGroup("BOOLEANEQUATION",
                                                  new Sequence(
                                                      new RecursionCreate("RECURSIONEXPRESSION",
                                                                          //Expression: Variable ((AND|NAND|OR|NOR|XOR|XNOR) Variable)*
                                                                          new Sequence(VARIABLE, new Sequence(GATE, VARIABLE).Star())
                                                                          )
                                                      ,
                                                      // ensure reaches end of file
                                                      new NotPredicate(new AnyCharacter())
                                                      )
                                                  );

            #endregion

            // single variable
            var input    = "A*!B+!A*B";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION");
            Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "A");
            Assert.IsTrue(node.Children[1].Token.Name == "GATE");
            Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*");
            Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR");
            Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "B");
            Assert.IsTrue(node.Children[3].Token.Name == "GATE");
            Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR");
            Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "A");
            Assert.IsTrue(node.Children[5].Token.Name == "GATE");
            Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "B");

            // quoted variable
            input    = "'aA'*!'bB'+!'aA'*'bB'";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "BOOLEANEQUATION");
            Assert.IsTrue(node.Children[0].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "aA");
            Assert.IsTrue(node.Children[1].Token.Name == "GATE");
            Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "*");
            Assert.IsTrue(node.Children[2].Token.Name == "INVERTOR");
            Assert.IsTrue(node.Children[2].Children[0].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[2].Children[0].Token.ValueAsString(iterator) == "bB");
            Assert.IsTrue(node.Children[3].Token.Name == "GATE");
            Assert.IsTrue(node.Children[4].Token.Name == "INVERTOR");
            Assert.IsTrue(node.Children[4].Children[0].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[4].Children[0].Token.ValueAsString(iterator) == "aA");
            Assert.IsTrue(node.Children[5].Token.Name == "GATE");
            Assert.IsTrue(node.Children[6].Token.Name == "VARIABLE");
            Assert.IsTrue(node.Children[6].Token.ValueAsString(iterator) == "bB");


            // expression + gate + variable .star()
            input    = "A*!B*C+!A*B*C";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning             Assert.IsTrue(node.Token.Value == input);

            // parethesis
            input    = "((A)*(!B)+(!A)*(B))";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning            Assert.IsTrue(node.Token.Value == input);

            input    = "((A)*!(B)+!(A)*(B))";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning            Assert.IsTrue(node.Token.Value == input);

            input    = "((A)*(!(B))+(!(A))*(B))";
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning            Assert.IsTrue(node.Token.Value == input);

            input    = ("(!X*Y*!Z)");
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning             Assert.IsTrue(node.Token.Value == input);

            input    = ("(!X*Y*!Z)+(!X*Y*Z)");
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning             Assert.IsTrue(node.Token.Value == input);

            input    = ("(X*Z)");
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning             Assert.IsTrue(node.Token.Value == input);

            input    = ("(!X*Y*!Z)+(!X*Y*Z)+(X*Z)");
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning            Assert.IsTrue(node.Token.Value == input);

            input    = ("((((!X*Y*Z)+(!X*Y*!Z)+(X*Z))))");
            bytes    = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor  = new NpegParserVisitor(iterator);
            Root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
#warning            Assert.IsTrue(node.Token.Value == input);
        }
Example #44
0
        public void Terminal_DynamicBackReference()
        {
            #region Composite

            AExpression TAG = new CapturingGroup("TAG",
                                                 new OneOrMore(
                                                 	new CharacterClass {ClassExpression = "[a-zA-Z0-9]"}
                                                 	)
                );

            AExpression StartTag = new CapturingGroup("START_TAG",
                                                      new Sequence(
                                                      	new Literal {MatchText = "<"}, TAG)
                                                      	.Sequence(
                                                      		new Literal {MatchText = ">"}
                                                      	)
                );

            AExpression EndTag = new CapturingGroup("END_TAG",
                                                    new Sequence(
                                                        new Literal {MatchText = "</"},
                                                        new DynamicBackReference
                                                            {
                                                                BackReferenceName = "TAG",
                                                                IsCaseSensitive = true
                                                            }
                                                        )
                                                        .Sequence(
                                                            new Literal {MatchText = ">"}
                                                        )
                );

            AExpression Body = new CapturingGroup("Body", new Sequence(new NotPredicate(EndTag), new AnyCharacter()).Star());

            AExpression Expression = new CapturingGroup("Expression", new Sequence(StartTag, Body).Sequence(EndTag).Plus());

            #endregion

            String input = "<h1>hello</h1><h2>hello</h2>";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);

            Expression.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode ast = visitor.AST;
            #warning write tree
        }
Example #45
0
        public void CompositeVisitor_Recursiveness()
        {
            var whitespace = new CharacterClass {
                ClassExpression = "[ \t\r\n\v]"
            };

            var terminal = new PrioritizedChoice(
                new CapturingGroup("AnyCharacter", new Literal {
                MatchText = "."
            })
                ,
                new CapturingGroup("CapturingGroup",
                                   new Sequence(
                                       new Literal {
                MatchText = "(?<"
            },
                                       new CapturingGroup("ReplacementNode",
                                                          new OneOrMore(
                                                              new CharacterClass {
                ClassExpression = "[a-z0-9A-Z]"
            }
                                                              )
                                                          )
                                       )
                                   .Sequence(new Literal {
                MatchText = ">"
            })
                                   .Sequence(new RecursionCall("Expression"))
                                   .Sequence(new Literal {
                MatchText = ")"
            })
                                   )
                );

            var sequence = new CapturingGroup(
                "Sequence",
                new Sequence(
                    terminal,
                    new ZeroOrMore(whitespace)
                    ).Plus()
                )
            {
                DoReplaceBySingleChildNode = true
            };

            var prioritizedchoice = new CapturingGroup("PrioritizedChoice",
                                                       new Sequence(
                                                           sequence,
                                                           new Literal {
                MatchText = "/"
            }
                                                           )
                                                       .Sequence(new ZeroOrMore(whitespace))
                                                       .Sequence(sequence)
                                                       .Sequence(
                                                           new ZeroOrMore(
                                                               new Sequence(
                                                                   new ZeroOrMore(whitespace),
                                                                   new Literal {
                MatchText = "/"
            }
                                                                   )
                                                               .Sequence(new ZeroOrMore(whitespace))
                                                               .Sequence(sequence)
                                                               .Plus()
                                                               )
                                                           )
                                                       );

            var expression = new CapturingGroup("Root",
                                                new RecursionCreate("Expression",
                                                                    new PrioritizedChoice(prioritizedchoice, sequence)));


            var input    = @"(?<NPEGNode>./.. )";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            expression.Accept(visitor);

            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "Root");
            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Children[0].Token.Name == "CapturingGroup");
            Assert.IsTrue(node.Children[0].Children.Count == 2);
            Assert.IsTrue(node.Children[0].Children[0].Token.Name == "ReplacementNode");
            Assert.IsTrue(node.Children[0].Children[1].Token.Name == "PrioritizedChoice");
            Assert.IsTrue(node.Children[0].Children[1].Children[0].Token.Name == "AnyCharacter");
            Assert.IsTrue(node.Children[0].Children[1].Children[1].Token.Name == "Sequence");
            Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[0].Token.Name == "AnyCharacter");
            Assert.IsTrue(node.Children[0].Children[1].Children[1].Children[1].Token.Name == "AnyCharacter");
        }
Example #46
0
        public void Terminal_DynamicBackReference_Recursive()
        {
            String input =
                @"
                    <test>
                        test data start
                        <test1>
                            test1 data start
                            <test2>
                                text2 data start
                                text2 data end
                            </test2>
                            test1 data end
                        </test1>
                        test data end
                    </test>
                ";

            var TAG = new CapturingGroup("TAG",
                                         new OneOrMore(
                                         	new CharacterClass {ClassExpression = "[a-zA-Z0-9]"}
                                         	)
                );

            var StartTag = new CapturingGroup("START_TAG",
                                              new Sequence(
                                              	new Literal {MatchText = "<"}, TAG)
                                              	.Sequence(
                                              		new Literal {MatchText = ">"}
                                              	)
                );

            var EndTag = new CapturingGroup("END_TAG",
                                            new Sequence(
                                                new Literal {MatchText = "</"},
                                                new DynamicBackReference
                                                    {
                                                        BackReferenceName = "TAG",
                                                        IsCaseSensitive = true
                                                    }
                                                )
                                                .Sequence(
                                                    new Literal {MatchText = ">"}
                                                )
                );

            var Body = new CapturingGroup("Body",
                                          new PrioritizedChoice(
                                          	new RecursionCall("MATCHXML"),
                                          	new Sequence(new NotPredicate(EndTag), new AnyCharacter())
                                          	).Star()
                );

            var Expression = new CapturingGroup("Expression",
                                                new RecursionCreate("MATCHXML",
                                                                    new Sequence(StartTag, Body)
                                                                        .Sequence(EndTag)
                                                                        .Plus()
                                                    )
                );

            var bytes = Encoding.UTF8.GetBytes(input.Trim());
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);

            Expression.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
        }
        public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice3()
        {
            var prefix = new PrioritizedChoice(
                new CapturingGroup("AndPredicate", new Literal {MatchText = "&"}),
                new CapturingGroup("NotPredicate", new Literal {MatchText = "!"})
                );

            PrioritizedChoice suffix = new PrioritizedChoice(
                new CapturingGroup("ZeroOrMore", new Literal {MatchText = "*"}),
                new CapturingGroup("OneOrMore", new Literal {MatchText = "+"})
                )
                .Or(new CapturingGroup("Optional", new Literal {MatchText = "?"}));

            var terminal = new CapturingGroup("AnyCharacter", new Literal {MatchText = "."});
            var expression = new CapturingGroup("Expression",
                                                new PrioritizedChoice(
                                                    // match prefixes first
                                                    prefix.Plus()
                                                        .Sequence(terminal)
                                                    ,
                                                    // match suffixes next
                                                    terminal
                                                        .Sequence(
                                                            suffix.Plus()
                                                        )
                                                    )
                                                    .Or(terminal)
                                                    .Plus()
                );

            var input = ".";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            expression.Accept(visitor);

            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Token.Name == "Expression");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == ".");
            Assert.IsTrue(node.Children[0].Token.Name == "AnyCharacter");
        }
Example #48
0
        public void Terminal_LimitingRepetition()
        {
            // min
            // min max
            // max
            // math expression using back referencing {(\k<C2> - \k<C1>)+1} - variable length protocols

            AExpression Digits = new CharacterClass {ClassExpression = "[0-9]"};

            #region nonterminals

            var MinTrue0 = new CapturingGroup("MinTrue",
                                              new LimitingRepetition(Digits) {Min = 0}
                );
            var MinFalse = new CapturingGroup("MinFalse",
                                              new LimitingRepetition(Digits) {Min = 44}
                );

            var MinTrue5 = new CapturingGroup("MinTrue",
                                              new LimitingRepetition(Digits) {Min = 5}
                );
            var MaxTrue = new CapturingGroup("MaxTrue",
                                             new LimitingRepetition(Digits) {Max = 5}
                );
            var MinMax = new CapturingGroup("MinMax",
                                            new LimitingRepetition(Digits) {Min = 5, Max = 6}
                );

            var ExceptionNoMinMax = new CapturingGroup("ExceptionNoMinMax",
                                                       new LimitingRepetition(Digits) {}
                );
            var ExceptionMaxLessThanMin = new CapturingGroup("ExceptionMaxLessThanMin",
                                                             new LimitingRepetition(Digits) {Min = 5, Max = 0}
                );

            #endregion

            String input = "1234567890";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);

            MinTrue0.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            iterator.Index = 0;
            visitor = new NpegParserVisitor(iterator);
            MinFalse.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);

            iterator.Index = 0;
            visitor = new NpegParserVisitor(iterator);
            MinTrue5.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);

            iterator.Index = 0;
            visitor = new NpegParserVisitor(iterator);
            MaxTrue.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;

            iterator.Index = 0;
            visitor = new NpegParserVisitor(iterator);
            MinMax.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;

            Int32 exceptionCount = 0;
            try
            {
                iterator.Index = 0;
                visitor = new NpegParserVisitor(iterator);
                ExceptionNoMinMax.Accept(visitor);
                Assert.IsTrue(visitor.IsMatch);
                node = visitor.AST;
            }
            catch (ArgumentException)
            {
                exceptionCount++;
            }

            try
            {
                iterator.Index = 0;
                visitor = new NpegParserVisitor(iterator);
                ExceptionMaxLessThanMin.Accept(visitor);
                Assert.IsTrue(visitor.IsMatch);
                node = visitor.AST;
            }
            catch (ArgumentException)
            {
                exceptionCount++;
            }

            Assert.IsTrue(exceptionCount == 2);
        }
        public void CompositeVisitor_NestedRecursive()
        {
            #region Composite

            var DIGITS = new CapturingGroup("DIGITS", new OneOrMore(new CharacterClass {ClassExpression = "[0-9]"}));
            var LTENCLOSED = new RecursionCreate("RECURSIONLTENCLOSED",
                                                 new PrioritizedChoice(DIGITS,
                                                                       new CapturingGroup("LTENCLOSED",
                                                                                          new Sequence(
                                                                                          	new Literal {MatchText = "<"},
                                                                                          	new RecursionCall(
                                                                                          		"RECURSIONLTENCLOSED")
                                                                                          	).Sequence(new Literal
                                                                                          	           	{MatchText = ">"})
                                                                       	)
                                                 	)
                );
            var PENCLOSED = new RecursionCreate("RECURSIONPENCLOSED",
                                                new PrioritizedChoice(LTENCLOSED,
                                                                      new CapturingGroup("PENCLOSED",
                                                                                         new Sequence(
                                                                                         	new Literal {MatchText = "("},
                                                                                         	new RecursionCall("RECURSIONPENCLOSED")
                                                                                         	).Sequence(new Literal
                                                                                         	           	{MatchText = ")"})
                                                                      	)
                                                    )
                );

            AExpression ROOT = new CapturingGroup("NESTEDRECURSIONTEST", PENCLOSED);

            #endregion

            var input = "(((<<<123>>>)))";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
            Assert.IsTrue(node.Token.Name == "NESTEDRECURSIONTEST");
            Assert.IsTrue(node.Children.Count == 1);
            Assert.IsTrue(node.Children[0].Token.Name == "PENCLOSED");
            Assert.IsTrue(node.Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Token.Name == "PENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Token.Name == "PENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1);
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Token.Name == "LTENCLOSED");
            Assert.IsTrue(node.Children[0].Children[0].Children[0].Children[0].Children[0].Children[0].Children.Count == 1);
        }
Example #50
0
        public void Terminal_Literal()
        {
            var Mixed = new Literal {MatchText = "Hello World"};

            var input = "hello world";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            Mixed.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);

            // Not case sensitve
            Mixed.IsCaseSensitive = false;

            input = "hello world";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            Mixed.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
        }
        public void PEGrammar_DynamicBackReference_Xml()
        {
            var grammar =
            @"
                    (?<Tag>): [a-zA-Z0-9]+;
                    (?<StartTag>): '<' Tag '>';
                    (?<EndTag>): '</' \k<Tag> '>' ;
                    (?<Body>): (Xml / (!EndTag .))+;
                    (?<Xml>): (StartTag Body EndTag )+;
            ";

            var input = @"
                    <test>
                        test data start
                        <test1>
                            test1 data start
                            <test2>
                                text2 data start
                                text2 data end
                            </test2>
                            test1 data end
                        </test1>
                        test data end
                    </test>
            ".Trim();

            var ROOT = PEGrammar.Load(grammar);
            var iterator = new ByteInputIterator(Encoding.UTF8.GetBytes(input));
            var visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            throw new NotImplementedException("Refactoring - plan on changing backreferencing logic inside NPEGParser - just placeholder of failing test for now; conserve memory");
        }
        public void PEGrammar_PhoneNumber()
        {
            var input = "123-456-7890";

            var PhoneNumber = PEGrammar.Load(
                @"
                        (?<ThreeDigitCode>): [0-9] [0-9] [0-9];
                        (?<FourDigitCode>): [0-9] [0-9] [0-9] [0-9];
                        (?<PhoneNumber>): ThreeDigitCode '-' ThreeDigitCode '-' FourDigitCode;
                    "
                    .Trim());

            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            PhoneNumber.Accept(visitor);

            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "PhoneNumber");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
            Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123");
            Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456");
            Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode");
            Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890");
        }
        public void PEGrammar_LimitingRepetition()
        {
            var grammar =
                @"
                                (?<ThreeDigitCode>): [0-9]{3,3};
                                (?<PhoneNumber>): ThreeDigitCode '-' ThreeDigitCode '-' (?<FourDigitCode>[0-9]{4});
                              ";

            var ROOT = PEGrammar.Load(grammar);

            var input = "123-456-7890";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "PhoneNumber");
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
            Assert.IsTrue(node.Children[0].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[0].Token.ValueAsString(iterator) == "123");
            Assert.IsTrue(node.Children[1].Token.Name == "ThreeDigitCode");
            Assert.IsTrue(node.Children[1].Token.ValueAsString(iterator) == "456");
            Assert.IsTrue(node.Children[2].Token.Name == "FourDigitCode");
            Assert.IsTrue(node.Children[2].Token.ValueAsString(iterator) == "7890");
        }
        public void Terminal_DynamicBackReference_Recursive()
        {
            String input =
                @"
                    <test>
                        test data start
                        <test1>
                            test1 data start
                            <test2>
                                text2 data start
                                text2 data end
                            </test2>
                            test1 data end
                        </test1>
                        test data end
                    </test>
                ";

            var TAG = new CapturingGroup("TAG",
                                         new OneOrMore(
                                             new CharacterClass {
                ClassExpression = "[a-zA-Z0-9]"
            }
                                             )
                                         );

            var StartTag = new CapturingGroup("START_TAG",
                                              new Sequence(
                                                  new Literal {
                MatchText = "<"
            }, TAG)
                                              .Sequence(
                                                  new Literal {
                MatchText = ">"
            }
                                                  )
                                              );

            var EndTag = new CapturingGroup("END_TAG",
                                            new Sequence(
                                                new Literal {
                MatchText = "</"
            },
                                                new DynamicBackReference
            {
                BackReferenceName = "TAG",
                IsCaseSensitive   = true
            }
                                                )
                                            .Sequence(
                                                new Literal {
                MatchText = ">"
            }
                                                )
                                            );


            var Body = new CapturingGroup("Body",
                                          new PrioritizedChoice(
                                              new RecursionCall("MATCHXML"),
                                              new Sequence(new NotPredicate(EndTag), new AnyCharacter())
                                              ).Star()
                                          );

            var Expression = new CapturingGroup("Expression",
                                                new RecursionCreate("MATCHXML",
                                                                    new Sequence(StartTag, Body)
                                                                    .Sequence(EndTag)
                                                                    .Plus()
                                                                    )
                                                );


            var bytes    = Encoding.UTF8.GetBytes(input.Trim());
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            Expression.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
        }
        public void PEGrammar_LimitingRepetition_VariableExpression()
        {
            var grammar =
                @"
                    (?<ESC_AMP_Y>): . . . (?<C1>.) (?<C2>.)
                    (
                        ((?<X> .) (?<D> .{3}))
                    ){(\k<C2> - \k<C1>)+1};

             ";

            var ROOT = PEGrammar.Load(grammar);

                                  //.     .      .    C1    C2    X     D     D      D
            var bytes = new byte[]{0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00};
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.Name == "ESC_AMP_Y");
            Assert.IsTrue(node.Token.End == bytes.Length - 1); // zero index

                                //.     .      .    C1    C2
            bytes = new byte[] { 0x00, 0x00, 0x00, 0x01, 0x02,
                0x00, 0x00, 0x00, 0x00,  //X     D     D      D
                0x00, 0x00, 0x00, 0x00,  //X     D     D      D
                0x00
            };
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "ESC_AMP_Y");
            Assert.IsTrue(node.Token.End == bytes.Length - 2); // zero index - expect additional character to not be consumed
        }
Example #56
0
        public void CompositeVisitor_CapturingGroup_SandBoxTest_PriorityChoice1()
        {
            PrioritizedChoice newline = new PrioritizedChoice(
                new Literal {
                MatchText = "\r\n"
            },                                                    // windows
                new Literal {
                MatchText = "\r\r"
            }                                                    // old macs
                )
                                        .Or(new Literal {
                MatchText = "\n"
            });                                                      // linux

            // Single Line Comment
            var singleLineComment = new Sequence(
                new Literal {
                MatchText = "//"
            },
                new Sequence(
                    new NotPredicate(newline),
                    new AnyCharacter()
                    )
                .Star()
                );

            // Multiline Comment
            var multiLineComment = new Sequence(
                new Literal {
                MatchText = "/*"
            },
                new Sequence(
                    new NotPredicate(new Literal {
                MatchText = "*/"
            }),
                    new AnyCharacter()
                    )
                .Star()
                .Sequence(new Literal {
                MatchText = "*/"
            })
                );

            var comment = new PrioritizedChoice(singleLineComment, multiLineComment);

            var whitespace = new PrioritizedChoice(
                new CharacterClass {
                ClassExpression = "[ \t\r\n\v]"
            },
                comment
                );

            var label = new CapturingGroup("Label",
                                           new Sequence(
                                               new CharacterClass {
                ClassExpression = "[a-zA-Z_]"
            },
                                               // must start with alpha character
                                               new ZeroOrMore(new CharacterClass {
                ClassExpression = "[a-zA-Z0-9_]"
            })
                                               )
                                           );

            var backreference = new CapturingGroup("DynamicBackReferencing",
                                                   new Sequence(
                                                       new Literal {
                MatchText = @"\k<"
            },
                                                       new Sequence(new ZeroOrMore(whitespace), label).Sequence(
                                                           new ZeroOrMore(whitespace))
                                                       )
                                                   .Sequence(
                                                       new Optional(
                                                           new Sequence(
                                                               new Sequence(
                                                                   new Literal {
                MatchText = "["
            },
                                                                   new CapturingGroup("CaseSensitive",
                                                                                      new Literal {
                MatchText = @"\i"
            }
                                                                                      )
                                                                   ),
                                                               new Literal {
                MatchText = "]"
            }
                                                               )
                                                           )
                                                       )
                                                   .Sequence(
                                                       new Sequence(new ZeroOrMore(whitespace), new Literal {
                MatchText = ">"
            })
                                                       )
                                                   );

            var root = new CapturingGroup("Test",
                                          new Sequence(
                                              backreference,
                                              new NotPredicate(new AnyCharacter())
                                              )
                                          );

            var input    = @"\k< CapturedLabelVariableName >";
            var bytes    = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);

            root.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;

            Assert.IsTrue(node.Token.Name == "Test");
            Assert.IsTrue(node.Children[0].Token.Name == "DynamicBackReferencing");
            Assert.IsTrue(node.Children[0].Children[0].Token.Name == "Label");
            Assert.IsTrue(node.Children[0].Children[0].Token.ValueAsString(iterator) == "CapturedLabelVariableName");
        }
        public void PEGrammar_MathematicalFormula_Recursion()
        {
            AExpression ROOT = PEGrammar.Load(
                @"
                    (?<Value>): [0-9]+ / '(' Expr ')';
                    (?<Product>): Value ((?<Symbol>'*' / '/') Value)*;
                    (?<Sum>): Product ((?<Symbol>'+' / '-') Product)*;
                    (?<Expr>): Sum;
                "
                );

            String input = "((((12/3)+5-2*(81/9))+1))";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);

            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
            #warning does not specify expected tree
        }
        public void PracticalExample_MathematicalFormula()
        {
            #region Composite

            var VALUE = new PrioritizedChoice(
                new CapturingGroup("VALUE",
                                   new OneOrMore(new CharacterClass {
                ClassExpression = "[0-9]"
            })
                                   )
                ,
                new Sequence(
                    new Literal {
                MatchText = "("
            },
                    new RecursionCall("ParethesisFunction")
                    )
                .Sequence(new Literal {
                MatchText = ")"
            })
                );

            var PRODUCT = new Sequence(
                VALUE,
                new Sequence(
                    new CapturingGroup("SYMBOL",
                                       new PrioritizedChoice(
                                           new Literal {
                MatchText = "*"
            },
                                           new Literal {
                MatchText = "/"
            }
                                           )
                                       ),
                    VALUE
                    ).Star()
                );

            var SUM = new Sequence(
                PRODUCT,
                new Sequence(
                    new CapturingGroup("SYMBOL",
                                       new PrioritizedChoice(
                                           new Literal {
                MatchText = "+"
            },
                                           new Literal {
                MatchText = "-"
            }
                                           )
                                       ),
                    PRODUCT
                    ).Star()
                );

            AExpression EXPRESSION = new RecursionCreate("ParethesisFunction", new CapturingGroup("EXPRESSION", SUM));

            #endregion

            var input = "((((12/3)+5-2*(81/9))+1))";
            var bytes = Encoding.UTF8.GetBytes(input);

            var iterator = new ByteInputIterator(bytes);
            var visitor  = new NpegParserVisitor(iterator);
            EXPRESSION.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;


            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
        }
        public void PEGrammar_RecursiveParentheses()
        {
            var input = "((((((123))))))";
            var bytes = Encoding.UTF8.GetBytes(input);

            AExpression ROOT = PEGrammar.Load(
                @"
                        (?<DIGITS>): ([0-9])+;
                        (?<ENCLOSEDDIGITS>): '(' ParethesisFunction ')';
                        ParethesisFunction: (DIGITS / ENCLOSEDDIGITS);
                        (?<RECURSIONTEST>): ParethesisFunction;
                    "
                    .Trim());

            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            ROOT.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode node = visitor.AST;
            Assert.IsTrue(node.Token.ValueAsString(iterator) == input);
        }
Example #60
0
        public void Terminal_CodePoint_Binary()
        {
            Assert.IsTrue((Byte) 'a' == 97);
            Assert.IsTrue((Byte) 'a' == 0x61);

            var input = "a";
            var bytes = Encoding.UTF8.GetBytes(input);
            var iterator = new ByteInputIterator(bytes);
            var visitor = new NpegParserVisitor(iterator);
            var binary = new CapturingGroup("Binary",
                                            new CodePoint {Match = "#b1100001"}
                );
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            AstNode ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "a");

            input = "aa";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            binary = new CapturingGroup("Binary",
                                        new CodePoint {Match = "#b0110000101100001"}
                );
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "aa");

            // Byte boundary tests
            input = "\0a";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            binary = new CapturingGroup("Binary",
                                        new CodePoint {Match = "#b00001100001"}
                );
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch, "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be null and letter a.");
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a");

            input = "\0a";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            binary = new CapturingGroup("Binary", new Sequence(new CodePoint {Match = "#b000"}, new CodePoint {Match = "#b01100001"}));
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch,
                          "During incomplete byte boundaries 0 is expected to prefix input;  This would shift input to the right by 4 bits.  In this case it complete codepoint should be null and letter a.");
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == "\0a");

            // Don't care tests
            input = Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71, 0x03, 0x00 });
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            binary = new CapturingGroup("Binary",
                     new OneOrMore(new CodePoint {Match = "#bXXXX0001"}) // #bXXXX0001
            );
            binary.Accept(visitor);
            Assert.IsTrue(visitor.IsMatch);
            ast = visitor.AST;
            Assert.IsTrue(ast.Token.Name == "Binary");
            Assert.IsTrue(ast.Token.ValueAsString(iterator) == Encoding.ASCII.GetString(new byte[] { 0x11, 0x01, 0x71 }));

            input = Encoding.ASCII.GetString(new byte[] { 0x10 });
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            binary = new CapturingGroup("Binary", new CodePoint {Match = "#bXXXX0001"});
            binary.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);

            // cannot consume character test
            input = "";
            bytes = Encoding.UTF8.GetBytes(input);
            iterator = new ByteInputIterator(bytes);
            visitor = new NpegParserVisitor(iterator);
            binary = new CapturingGroup("Binary", new CodePoint {Match = "#bXXXX0001"});
            binary.Accept(visitor);
            Assert.IsFalse(visitor.IsMatch);
        }