Esempio n. 1
0
        public void SimpleAlphanumericWithEmbeddedWhitespaceInterception()
        {
            var input    = "A B C D";
            var p1       = "\\s*A\\s*";
            var p2       = "\\s*B\\s*";      // This should overshadow the "double" match that follows
            var p3       = "\\s*B\\s*C\\s*"; // We "hop over" this one because we "intercept the single token above
            var p4       = "\\s*C\\s*";
            var p5       = "\\s*D\\s*";
            var patterns = new List <string>();

            patterns.Add(p1);
            patterns.Add(p2);
            patterns.Add(p3);
            patterns.Add(p4);
            patterns.Add(p5);

            var lexer = new GELexer(input, patterns.ToArray());

            // We get 4 tokens, but the double pattern is passed over
            var t1 = lexer.NextToken();
            var t2 = lexer.NextToken();
            var t3 = lexer.NextToken();
            var t4 = lexer.NextToken();

            Assert.AreEqual(t1, "A");
            Assert.AreEqual(t2, "B");
            Assert.AreEqual(t3, "C");
            Assert.AreEqual(t4, "D");
        }
Esempio n. 2
0
        public void SimpleAlphanumericWithEmbeddedWhitespaceAlternateOrder()
        {
            var input    = "A B C D"; // This should overshadow the single match in p3
            var p1       = "\\s*B\\s*C\\s*";
            var p2       = "\\s*A\\s*";
            var p3       = "\\s*C\\s*"; // We "hop over" this one because of the previous two part match in p1
            var p4       = "\\s*D\\s*";
            var patterns = new List <string>();

            patterns.Add(p1);
            patterns.Add(p2);
            patterns.Add(p3);
            patterns.Add(p4);

            var lexer = new GELexer(input, patterns.ToArray());

            // should only get 3 tokens instead of 4
            var t1 = lexer.NextToken();
            var t2 = lexer.NextToken();
            var t3 = lexer.NextToken();

            Assert.AreEqual(t1, "A");
            Assert.AreEqual(t2, "B C");
            Assert.AreEqual(t3, "D");
        }
Esempio n. 3
0
        public void SimpleAlphanumericWithSurroundingWhitespace()
        {
            var input    = "A B C D";
            var p1       = "\\s*A\\s*";
            var p2       = "\\s*C\\s*"; // swapped
            var p3       = "\\s*B\\s*"; // swapped
            var p4       = "\\s*D\\s*";
            var patterns = new List <string>();

            patterns.Add(p1);
            patterns.Add(p2);
            patterns.Add(p3);
            patterns.Add(p4);

            var lexer = new GELexer(input, patterns.ToArray());

            var t1 = lexer.NextToken();
            var t2 = lexer.NextToken();
            var t3 = lexer.NextToken();
            var t4 = lexer.NextToken();

            Assert.AreEqual(t1, "A");
            Assert.AreEqual(t2, "B"); // swapped in regex array, should still find it in correct order
            Assert.AreEqual(t3, "C"); // swapped in regex array, should still find it in correct order
            Assert.AreEqual(t4, "D");
        }
Esempio n. 4
0
        public void SimpleAlphanumericExplicit()
        {
            var input    = "A B C D";
            var p1       = "A";
            var p2       = "B";
            var p3       = "C";
            var p4       = "D";
            var patterns = new List <string>();

            patterns.Add(p1);
            patterns.Add(p2);
            patterns.Add(p3);
            patterns.Add(p4);

            var lexer = new GELexer(input, patterns.ToArray());

            var t1 = lexer.NextToken();
            var t2 = lexer.NextToken();
            var t3 = lexer.NextToken();
            var t4 = lexer.NextToken();

            Assert.AreEqual(t1, "A");
            Assert.AreEqual(t2, "B");
            Assert.AreEqual(t3, "C");
            Assert.AreEqual(t4, "D");
        }
Esempio n. 5
0
        public void TokenizeLawnmowerADF0Grammar()
        {
            var strings = new[]
            {
                "<start> ::= <op>",
                "<op> ::= (progn2 <op> <op>) | (v8a <op> <op>)",
                "<op> ::= (mow) | (left) | (ERC)"
            };
            var patterns = DEFAULT_REGEXES;

            var rule1 = strings[0];
            var rule2 = strings[1];
            var rule3 = strings[2];

            // Rule 1
            var lexer = new GELexer(rule1, patterns);

            Assert.AreEqual(lexer.NextToken(), "<start>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.IsNull(lexer.NextToken());
            // Rule 2
            lexer = new GELexer(rule2, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "progn2");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "v8a");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
            // Rule 3
            lexer = new GELexer(rule3, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "mow");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "left");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "ERC");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
        }
Esempio n. 6
0
        public void TokenizeRegressionGrammar()
        {
            var strings = new[]
            {
                "# grammar of the regression problem including ERCs",
                "<start> ::= <op>",
                "<op> ::= (+ <op><op>)|(-<op><op>) |(* <op> <op>) | (% <op> <op>)",
                "<op>::=(sin <op>) | (cos <op>) | (exp <op>) | (rlog <op>)",
                "<op>::=(x) | (   ERC     )"
            };
            var patterns = DEFAULT_REGEXES;

            var rule1 = strings[0];
            var rule2 = strings[1];
            var rule3 = strings[2];
            var rule4 = strings[3];
            var rule5 = strings[4];

            // Rule 1
            var lexer = new GELexer(rule1, patterns);

            Assert.AreEqual(lexer.NextToken(), "# grammar of the regression problem including ERCs");
            Assert.IsNull(lexer.NextToken());
            // Rule 2
            lexer = new GELexer(rule2, patterns);
            Assert.AreEqual(lexer.NextToken(), "<start>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.IsNull(lexer.NextToken());
            // Rule 3
            lexer = new GELexer(rule3, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "+");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "-");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "*");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "%");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
            // Rule 4
            lexer = new GELexer(rule4, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "sin");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "cos");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "exp");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "rlog");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
            // Rule 5
            lexer = new GELexer(rule5, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "x");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "ERC");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
        }
Esempio n. 7
0
        public void TokenizeAntGrammar()
        {
            var strings = new[]
            {
                "<prog> ::= <op>",
                "<op> ::= (if-food-ahead <op> <op>)",
                "<op> ::=  (progn2 <op> <op>)",
                "<op> ::= (progn3 <op> <op> <op>)",
                "<op> ::= (left) | (right) | (move)"
            };
            var patterns = DEFAULT_REGEXES;

            var rule1 = strings[0];
            var rule2 = strings[1];
            var rule3 = strings[2];
            var rule4 = strings[3];
            var rule5 = strings[4];

            // Rule 1
            var lexer = new GELexer(rule1, patterns);

            Assert.AreEqual(lexer.NextToken(), "<prog>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.IsNull(lexer.NextToken());
            // Rule 2
            lexer = new GELexer(rule2, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "if-food-ahead");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
            // Rule 3
            lexer = new GELexer(rule3, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "progn2");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
            // Rule 4
            lexer = new GELexer(rule4, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "progn3");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
            // Rule 5
            lexer = new GELexer(rule5, patterns);
            Assert.AreEqual(lexer.NextToken(), "<op>");
            Assert.AreEqual(lexer.NextToken(), "::=");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "left");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "right");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.AreEqual(lexer.NextToken(), "|");
            Assert.AreEqual(lexer.NextToken(), "(");
            Assert.AreEqual(lexer.NextToken(), "move");
            Assert.AreEqual(lexer.NextToken(), ")");
            Assert.IsNull(lexer.NextToken());
        }