public void SimpleAlphanumericWithEmbeddedWhitespaceInterception() { var input = "A B C D"; var p1 = "\\s*A\\s*"; var p2 = "\\s*B\\s*"; // This should overshadow the "double" match that follows var p3 = "\\s*B\\s*C\\s*"; // We "hop over" this one because we "intercept the single token above var p4 = "\\s*C\\s*"; var p5 = "\\s*D\\s*"; var patterns = new List <string>(); patterns.Add(p1); patterns.Add(p2); patterns.Add(p3); patterns.Add(p4); patterns.Add(p5); var lexer = new GELexer(input, patterns.ToArray()); // We get 4 tokens, but the double pattern is passed over var t1 = lexer.NextToken(); var t2 = lexer.NextToken(); var t3 = lexer.NextToken(); var t4 = lexer.NextToken(); Assert.AreEqual(t1, "A"); Assert.AreEqual(t2, "B"); Assert.AreEqual(t3, "C"); Assert.AreEqual(t4, "D"); }
public void SimpleAlphanumericWithEmbeddedWhitespaceAlternateOrder() { var input = "A B C D"; // This should overshadow the single match in p3 var p1 = "\\s*B\\s*C\\s*"; var p2 = "\\s*A\\s*"; var p3 = "\\s*C\\s*"; // We "hop over" this one because of the previous two part match in p1 var p4 = "\\s*D\\s*"; var patterns = new List <string>(); patterns.Add(p1); patterns.Add(p2); patterns.Add(p3); patterns.Add(p4); var lexer = new GELexer(input, patterns.ToArray()); // should only get 3 tokens instead of 4 var t1 = lexer.NextToken(); var t2 = lexer.NextToken(); var t3 = lexer.NextToken(); Assert.AreEqual(t1, "A"); Assert.AreEqual(t2, "B C"); Assert.AreEqual(t3, "D"); }
public void SimpleAlphanumericWithSurroundingWhitespace() { var input = "A B C D"; var p1 = "\\s*A\\s*"; var p2 = "\\s*C\\s*"; // swapped var p3 = "\\s*B\\s*"; // swapped var p4 = "\\s*D\\s*"; var patterns = new List <string>(); patterns.Add(p1); patterns.Add(p2); patterns.Add(p3); patterns.Add(p4); var lexer = new GELexer(input, patterns.ToArray()); var t1 = lexer.NextToken(); var t2 = lexer.NextToken(); var t3 = lexer.NextToken(); var t4 = lexer.NextToken(); Assert.AreEqual(t1, "A"); Assert.AreEqual(t2, "B"); // swapped in regex array, should still find it in correct order Assert.AreEqual(t3, "C"); // swapped in regex array, should still find it in correct order Assert.AreEqual(t4, "D"); }
public void SimpleAlphanumericExplicit() { var input = "A B C D"; var p1 = "A"; var p2 = "B"; var p3 = "C"; var p4 = "D"; var patterns = new List <string>(); patterns.Add(p1); patterns.Add(p2); patterns.Add(p3); patterns.Add(p4); var lexer = new GELexer(input, patterns.ToArray()); var t1 = lexer.NextToken(); var t2 = lexer.NextToken(); var t3 = lexer.NextToken(); var t4 = lexer.NextToken(); Assert.AreEqual(t1, "A"); Assert.AreEqual(t2, "B"); Assert.AreEqual(t3, "C"); Assert.AreEqual(t4, "D"); }
public void TokenizeLawnmowerADF0Grammar() { var strings = new[] { "<start> ::= <op>", "<op> ::= (progn2 <op> <op>) | (v8a <op> <op>)", "<op> ::= (mow) | (left) | (ERC)" }; var patterns = DEFAULT_REGEXES; var rule1 = strings[0]; var rule2 = strings[1]; var rule3 = strings[2]; // Rule 1 var lexer = new GELexer(rule1, patterns); Assert.AreEqual(lexer.NextToken(), "<start>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.IsNull(lexer.NextToken()); // Rule 2 lexer = new GELexer(rule2, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "progn2"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "v8a"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); // Rule 3 lexer = new GELexer(rule3, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "mow"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "left"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "ERC"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); }
public void TokenizeRegressionGrammar() { var strings = new[] { "# grammar of the regression problem including ERCs", "<start> ::= <op>", "<op> ::= (+ <op><op>)|(-<op><op>) |(* <op> <op>) | (% <op> <op>)", "<op>::=(sin <op>) | (cos <op>) | (exp <op>) | (rlog <op>)", "<op>::=(x) | ( ERC )" }; var patterns = DEFAULT_REGEXES; var rule1 = strings[0]; var rule2 = strings[1]; var rule3 = strings[2]; var rule4 = strings[3]; var rule5 = strings[4]; // Rule 1 var lexer = new GELexer(rule1, patterns); Assert.AreEqual(lexer.NextToken(), "# grammar of the regression problem including ERCs"); Assert.IsNull(lexer.NextToken()); // Rule 2 lexer = new GELexer(rule2, patterns); Assert.AreEqual(lexer.NextToken(), "<start>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.IsNull(lexer.NextToken()); // Rule 3 lexer = new GELexer(rule3, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "+"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "-"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "*"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "%"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); // Rule 4 lexer = new GELexer(rule4, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "sin"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "cos"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "exp"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "rlog"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); // Rule 5 lexer = new GELexer(rule5, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "x"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "ERC"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); }
public void TokenizeAntGrammar() { var strings = new[] { "<prog> ::= <op>", "<op> ::= (if-food-ahead <op> <op>)", "<op> ::= (progn2 <op> <op>)", "<op> ::= (progn3 <op> <op> <op>)", "<op> ::= (left) | (right) | (move)" }; var patterns = DEFAULT_REGEXES; var rule1 = strings[0]; var rule2 = strings[1]; var rule3 = strings[2]; var rule4 = strings[3]; var rule5 = strings[4]; // Rule 1 var lexer = new GELexer(rule1, patterns); Assert.AreEqual(lexer.NextToken(), "<prog>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.IsNull(lexer.NextToken()); // Rule 2 lexer = new GELexer(rule2, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "if-food-ahead"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); // Rule 3 lexer = new GELexer(rule3, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "progn2"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); // Rule 4 lexer = new GELexer(rule4, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "progn3"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); // Rule 5 lexer = new GELexer(rule5, patterns); Assert.AreEqual(lexer.NextToken(), "<op>"); Assert.AreEqual(lexer.NextToken(), "::="); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "left"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "right"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.AreEqual(lexer.NextToken(), "|"); Assert.AreEqual(lexer.NextToken(), "("); Assert.AreEqual(lexer.NextToken(), "move"); Assert.AreEqual(lexer.NextToken(), ")"); Assert.IsNull(lexer.NextToken()); }