public void DoesTokenizationWorksAtAll(Encoding encoding) { // Indices: 01234567890123456 const string SOURCE = @"lexem123ley,6.7&#"; var buffer = encoding.GetBytes(SOURCE); var lexemes = new[] { "lexem", "123", "ley", ",", "6", ".", "7", "&", "#" }; int[] tokensIndices = new int[SOURCE.Length + 1]; int[] tokensClasses = new int[SOURCE.Length]; int[] tokenLengths = new int[SOURCE.Length]; var tokenizer = new RegExpTokenizer() { TokensClasses = tokensClasses, TokensIndices = tokensIndices, TokensLengths = tokenLengths }; tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); Array.ForEach(lexemes, (s) => tokenizer.UseTerminal(RegExp.Literal(s, encoding))); tokenizer.BuildTransitions(); var lastTokenIndex = tokenizer.Tokenize(buffer, 0, buffer.Length); Assert.That(lastTokenIndex + 1 == lexemes.Length); // correct #tokens? // Check whether each token has been recognized correctly for (int i = 0; i < lastTokenIndex; i++) { var tokenValue = encoding.GetString(buffer, tokensIndices[i], tokenLengths[i]); Assert.AreEqual(lexemes[i], tokenValue); } }
public ParsingTableGenerationTests() { E = new NonTerminal("E"); T = new NonTerminal("T"); F = new NonTerminal("F"); tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); id = tokenizer.UseTerminal(RegExp.GetNumberRegExp()); plus = tokenizer.UseTerminal(RegExp.Literal('+')); mult = tokenizer.UseTerminal(RegExp.Literal('*')); leftBrace = tokenizer.UseTerminal(RegExp.Literal('(')); rightBrace = tokenizer.UseTerminal(RegExp.Literal(')')); grammar = new AugmentedGrammar() { E --> E & plus & T, E --> T, T --> T & mult & F, T --> F, F --> leftBrace & E & rightBrace, F --> id }; }
public static Lexer GetLexer() { var tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); return new Lexer(tokenizer); }
public void AmbiguousGrammarParseTest() { var E = new NonTerminal("E"); var L = new NonTerminal("L"); var R = new NonTerminal("R"); var tokenizer = new RegExpTokenizer(); tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); var id = tokenizer.UseTerminal(RegExp.GetNumberRegExp()); var assign = tokenizer.UseTerminal(RegExp.Literal("=")); var deref = tokenizer.UseTerminal(RegExp.Literal("*")); tokenizer.BuildTransitions(); var grammar = new AugmentedGrammar() { E --> L & assign & R, E --> R, L --> deref & R, L --> id, R --> L }; var ptBuilder = new SLRParsingTableBuilder(); ptBuilder.SetGrammar(grammar); Assert.Throws(typeof(ParserException), ptBuilder.ConstructParsingTable); }
public void IgnoreTokenLazyQuantificationTest(Encoding encoding) { var tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); var number = tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Range('0', '9', encoding))); var whitespace = tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Choice( RegExp.Literal(' ', encoding), RegExp.Literal('\t', encoding), RegExp.Literal('\n', encoding)))); tokenizer.IgnoreTerminal(RegExp.Sequence(RegExp.Literal("/*", encoding), RegExp.AnyNumberOf(RegExp.Range((char)0, (char)255, encoding)), RegExp.Literal("*/", encoding))); tokenizer.BuildTransitions(); // Number of tokens: 1 23 45 67 89 01 // Indices: 012345678901234567890123456789 const string input = "123 456 /*cdnp*/ 87 /*ae*/ 789"; int bufferLength = encoding.GetByteCount(input); int[] tokenClasses = new int[bufferLength]; int[] tokenIndices = new int[bufferLength]; int[] tokenLengths = new int[bufferLength]; int numClass = number.TokenClassID; int wsClass = whitespace.TokenClassID; int[] expectedTokenClasses = new[] { numClass, wsClass, numClass, wsClass, wsClass, numClass, wsClass, wsClass, numClass }; var expectedTokenIndices = new List<int>(15); //new[] { 0, 3, 4, 7, 16, 17, 19, 26, 27 }; var tokens = new[] {"123", " ", "456", " ", "/*cdnp*/", " ", "87", " ", "/*ae*/", " ", "789"}; expectedTokenIndices.Add(0); for (int i = 0; i < tokens.Length; i++) { string token = tokens[i]; expectedTokenIndices.Add(expectedTokenIndices[i] + encoding.GetByteCount(token)); } // Delete ingored tokens expectedTokenIndices.RemoveAt(8); expectedTokenIndices.RemoveAt(4); var rawInput = encoding.GetBytes(input); tokenizer.TokensClasses = tokenClasses; tokenizer.TokensIndices = tokenIndices; tokenizer.TokensLengths = tokenLengths; int tokensNum = tokenizer.Tokenize(rawInput, 0, rawInput.Length) + 1; Assert.That(tokensNum, Is.EqualTo(expectedTokenClasses.Length)); for (int i = 0; i < tokensNum; i++) { Assert.That(tokenClasses[i], Is.EqualTo(expectedTokenClasses[i]), "Error On token class comparison: " + i); Assert.That(tokenIndices[i], Is.EqualTo(expectedTokenIndices[i]), "Error On token index comparison: " + i); } }
public void UnicodeTest() { ITokenizer tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); var hebrewWord = tokenizer.UseTerminal(RegExp.Literal("עברית", Encoding.Unicode)); // 1 var russianWord = tokenizer.UseTerminal(RegExp.Literal("русский", Encoding.Unicode)); // 2 var englishWord = tokenizer.UseTerminal(RegExp.Literal("english", Encoding.Unicode)); // 3 var whitespace = tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Choice( RegExp.Literal(' ', Encoding.Unicode), RegExp.Literal('\t', Encoding.Unicode), RegExp.Literal('\n', Encoding.Unicode)))); tokenizer.BuildTransitions(); const string tokens = "1 23 45 67 89 01 23 "; const string indices = "01234567890123456789012345678901234567890123456789"; const string input = "עברית русский english עברית english русский עברית"; int[] tokenClasses = new int[input.Length]; int[] tokenIndices = new int[input.Length]; int[] tokenLengths = new int[input.Length]; int hebClass = hebrewWord.TokenClassID; int engClass = englishWord.TokenClassID; int rusClass = russianWord.TokenClassID; int wsClass = whitespace.TokenClassID; int[] expectedTokenClasses = new[] { hebClass, wsClass, rusClass, wsClass, engClass, wsClass, hebClass, wsClass, engClass, wsClass, rusClass, wsClass, hebClass}; int[] expectedTokenIndices = new[] { 0, 5, 6, 13, 14, 21, 22, 27, 28, 35, 36, 43, 44 }; var rawInput = Encoding.Unicode.GetBytes(input); tokenizer.TokensClasses = tokenClasses; tokenizer.TokensIndices = tokenIndices; tokenizer.TokensLengths = tokenLengths; int tokensNum = tokenizer.Tokenize(rawInput, 0, rawInput.Length) + 1; Assert.That(tokensNum, Is.EqualTo(expectedTokenClasses.Length)); for (int i = 0; i < tokensNum; i++) { Assert.That(tokenClasses[i], Is.EqualTo(expectedTokenClasses[i])); Assert.That(tokenIndices[i], Is.EqualTo(expectedTokenIndices[i]*2)); // Each symbol takes 2 bytes } }
public void GeneralTest() { ITokenizer tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); var number = tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Choice( RegExp.Literal('0'), RegExp.Literal('1'), RegExp.Literal('2'), RegExp.Literal('3'), RegExp.Literal('4'), RegExp.Literal('5'), RegExp.Literal('6'), RegExp.Literal('7'), RegExp.Literal('8'), RegExp.Literal('9')))); var whitespace = tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Choice( RegExp.Literal(' '), RegExp.Literal('\t'), RegExp.Literal('\n')))); tokenizer.BuildTransitions(); // Number of tokens: 1 23 45 67 890123 45 // Indices: 01234567890123456789012345678901 const string input = "123 456 789 02348 0 3 452 55555"; int[] tokenClasses = new int[input.Length]; int[] tokenIndices = new int[input.Length]; int[] tokenLengths = new int[input.Length]; int numClass = number.TokenClassID; int wsClass = whitespace.TokenClassID; int[] expectedTokenClasses = new[] { numClass, wsClass, numClass, wsClass, numClass, wsClass, numClass, wsClass, numClass, wsClass, numClass,wsClass,numClass,wsClass,numClass}; int[] expectedTokenIndices = new[] {0, 3, 4, 7, 8, 11, 12, 17, 18, 19, 20, 21, 22, 25, 26, 31}; var rawInput = Encoding.ASCII.GetBytes(input); tokenizer.TokensClasses = tokenClasses; tokenizer.TokensIndices = tokenIndices; tokenizer.TokensLengths = tokenLengths; int tokensNum = tokenizer.Tokenize(rawInput, 0, rawInput.Length) + 1; Assert.That(tokensNum, Is.EqualTo(expectedTokenClasses.Length)); for (int i = 0; i < tokensNum; i++) { Assert.That(tokenClasses[i], Is.EqualTo(expectedTokenClasses[i])); Assert.That(tokenIndices[i], Is.EqualTo(expectedTokenIndices[i])); } }
public void ArithmeticStatementFullyAutonomousParseTest() { const string INPUT = @"2*(3+4)"; string fileName = Path.GetTempFileName(); File.WriteAllText(fileName, INPUT); RegExpTokenizer tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); ILexer lexer = new Lexer(tokenizer); Stream fileStream = File.OpenRead(fileName); lexer.SetDataSource(fileStream); var stack = new Stack<int>(); var parser = new LRParser(); var E = new NonTerminal("E"); var T = new NonTerminal("T"); var F = new NonTerminal("F"); var id = tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Choice( RegExp.Literal('0'), RegExp.Literal('1'), RegExp.Literal('2'), RegExp.Literal('3'), RegExp.Literal('4'), RegExp.Literal('5'), RegExp.Literal('6'), RegExp.Literal('7'), RegExp.Literal('8'), RegExp.Literal('9')))); var plus = tokenizer.UseTerminal(RegExp.Literal('+')); var mult = tokenizer.UseTerminal(RegExp.Literal('*')); var leftBrace = tokenizer.UseTerminal(RegExp.Literal('(')); var rightBrace = tokenizer.UseTerminal(RegExp.Literal(')')); tokenizer.BuildTransitions(); var grammar = new AugmentedGrammar() { E --> E & plus & T ^ (v => stack.Push(stack.Pop() + stack.Pop())), E --> T, T --> T & mult & F ^ (v => stack.Push(stack.Pop() * stack.Pop())), T --> F, F --> leftBrace & E & rightBrace, F --> id ^ (v => stack.Push(v[id].AsInt())) }; Console.WriteLine("Grammar is being tested: \n{0}", grammar); Console.WriteLine("Input is being parsed: {0}\n", INPUT); Console.WriteLine("Parsing process:\n"); parser.Grammar = grammar; parser.Lexer = lexer; var ptBuilder = new SLRParsingTableBuilder(); ptBuilder.SetGrammar(grammar); ptBuilder.ConstructParsingTable(); parser.ParsingTable = ptBuilder.GetTable(); parser.InputAccepted += (sender, eventArgs) => Console.WriteLine("Accepted!"); parser.ParseInput(); Assert.That(stack.Pop(), Is.EqualTo(14)); Assert.That(stack.Count, Is.EqualTo(0)); fileStream.Close(); File.Delete(fileName); }
public void ArithmeticStatementParseTest() { const string INPUT = @"2*(3+4)";//@"2*(3+4)$" ActionTableEntry[,] actionTable; int[,] gotoTable; string fileName; ILexer lexer; Stream fileStream; RegExpTokenizer tokenizer; fileName = Path.GetTempFileName(); File.WriteAllText(fileName, INPUT); tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); lexer = new Lexer(tokenizer); fileStream = File.OpenRead(fileName); lexer.SetDataSource(fileStream); actionTable = new ActionTableEntry[12, 6]; #region Populating Action Table actionTable[0, 1].Action = ParserAction.Shift; actionTable[0, 1].Destination = 5; actionTable[0, 4].Action = ParserAction.Shift; actionTable[0, 4].Destination = 4; actionTable[1, 0].Action = ParserAction.Accept; // <-- !!!! actionTable[1, 2].Action = ParserAction.Shift; actionTable[1, 2].Destination = 6; actionTable[2, 0].Action = ParserAction.Reduce; actionTable[2, 0].Destination = 2; actionTable[2, 2].Action = ParserAction.Reduce; actionTable[2, 2].Destination = 2; actionTable[2, 3].Action = ParserAction.Shift; actionTable[2, 3].Destination = 7; actionTable[2, 5].Action = ParserAction.Reduce; actionTable[2, 5].Destination = 2; actionTable[3, 0].Action = ParserAction.Reduce; actionTable[3, 0].Destination = 4; actionTable[3, 2].Action = ParserAction.Reduce; actionTable[3, 2].Destination = 4; actionTable[3, 3].Action = ParserAction.Reduce; actionTable[3, 3].Destination = 4; actionTable[3, 5].Action = ParserAction.Reduce; actionTable[3, 5].Destination = 4; actionTable[4, 1].Action = ParserAction.Shift; actionTable[4, 1].Destination = 5; actionTable[4, 4].Action = ParserAction.Shift; actionTable[4, 4].Destination = 4; actionTable[5, 0].Action = ParserAction.Reduce; actionTable[5, 0].Destination = 6; actionTable[5, 2].Action = ParserAction.Reduce; actionTable[5, 2].Destination = 6; actionTable[5, 3].Action = ParserAction.Reduce; actionTable[5, 3].Destination = 6; actionTable[5, 5].Action = ParserAction.Reduce; actionTable[5, 5].Destination = 6; actionTable[6, 1].Action = ParserAction.Shift; actionTable[6, 1].Destination = 5; actionTable[6, 4].Action = ParserAction.Shift; actionTable[6, 4].Destination = 4; actionTable[7, 1].Action = ParserAction.Shift; actionTable[7, 1].Destination = 5; actionTable[7, 4].Action = ParserAction.Shift; actionTable[7, 4].Destination = 4; actionTable[8, 2].Action = ParserAction.Shift; actionTable[8, 2].Destination = 6; actionTable[8, 5].Action = ParserAction.Shift; actionTable[8, 5].Destination = 11; actionTable[9, 0].Action = ParserAction.Reduce; actionTable[9, 0].Destination = 1; actionTable[9, 2].Action = ParserAction.Reduce; actionTable[9, 2].Destination = 1; actionTable[9, 3].Action = ParserAction.Shift; actionTable[9, 3].Destination = 7; actionTable[9, 5].Action = ParserAction.Reduce; actionTable[9, 5].Destination = 1; actionTable[10, 0].Action = ParserAction.Reduce; actionTable[10, 0].Destination = 3; actionTable[10, 2].Action = ParserAction.Reduce; actionTable[10, 2].Destination = 3; actionTable[10, 3].Action = ParserAction.Reduce; actionTable[10, 3].Destination = 3; actionTable[10, 5].Action = ParserAction.Reduce; actionTable[10, 5].Destination = 3; actionTable[11, 0].Action = ParserAction.Reduce; actionTable[11, 0].Destination = 5; actionTable[11, 2].Action = ParserAction.Reduce; actionTable[11, 2].Destination = 5; actionTable[11, 3].Action = ParserAction.Reduce; actionTable[11, 3].Destination = 5; actionTable[11, 5].Action = ParserAction.Reduce; actionTable[11, 5].Destination = 5; //actionTable[0, 0].Action = ParserAction.Shift; //actionTable[0, 0].Destination = 5; //actionTable[0, 3].Action = ParserAction.Shift; //actionTable[0, 3].Destination = 4; //actionTable[1, 1].Action = ParserAction.Shift; //actionTable[1, 1].Destination = 6; //actionTable[1, 5].Action = ParserAction.Accept; // <-- !!!! //actionTable[2, 1].Action = ParserAction.Reduce; //actionTable[2, 1].Destination = 2; //actionTable[2, 2].Action = ParserAction.Shift; //actionTable[2, 2].Destination = 7; //actionTable[2, 4].Action = ParserAction.Reduce; //actionTable[2, 4].Destination = 2; //actionTable[2, 5].Action = ParserAction.Reduce; //actionTable[2, 5].Destination = 2; //actionTable[3, 1].Action = ParserAction.Reduce; //actionTable[3, 1].Destination = 4; //actionTable[3, 2].Action = ParserAction.Reduce; //actionTable[3, 2].Destination = 4; //actionTable[3, 4].Action = ParserAction.Reduce; //actionTable[3, 4].Destination = 4; //actionTable[3, 5].Action = ParserAction.Reduce; //actionTable[3, 5].Destination = 4; //actionTable[4, 0].Action = ParserAction.Shift; //actionTable[4, 0].Destination = 5; //actionTable[4, 3].Action = ParserAction.Shift; //actionTable[4, 3].Destination = 4; //actionTable[5, 1].Action = ParserAction.Reduce; //actionTable[5, 1].Destination = 6; //actionTable[5, 2].Action = ParserAction.Reduce; //actionTable[5, 2].Destination = 6; //actionTable[5, 4].Action = ParserAction.Reduce; //actionTable[5, 4].Destination = 6; //actionTable[5, 5].Action = ParserAction.Reduce; //actionTable[5, 5].Destination = 6; //actionTable[6, 0].Action = ParserAction.Shift; //actionTable[6, 0].Destination = 5; //actionTable[6, 3].Action = ParserAction.Shift; //actionTable[6, 3].Destination = 4; //actionTable[7, 0].Action = ParserAction.Shift; //actionTable[7, 0].Destination = 5; //actionTable[7, 3].Action = ParserAction.Shift; //actionTable[7, 3].Destination = 4; //actionTable[8, 1].Action = ParserAction.Shift; //actionTable[8, 1].Destination = 6; //actionTable[8, 4].Action = ParserAction.Shift; //actionTable[8, 4].Destination = 11; //actionTable[9, 1].Action = ParserAction.Reduce; //actionTable[9, 1].Destination = 1; //actionTable[9, 2].Action = ParserAction.Shift; //actionTable[9, 2].Destination = 7; //actionTable[9, 4].Action = ParserAction.Reduce; //actionTable[9, 4].Destination = 1; //actionTable[9, 5].Action = ParserAction.Reduce; //actionTable[9, 5].Destination = 1; //actionTable[10, 1].Action = ParserAction.Reduce; //actionTable[10, 1].Destination = 3; //actionTable[10, 2].Action = ParserAction.Reduce; //actionTable[10, 2].Destination = 3; //actionTable[10, 4].Action = ParserAction.Reduce; //actionTable[10, 4].Destination = 3; //actionTable[10, 5].Action = ParserAction.Reduce; //actionTable[10, 5].Destination = 3; //actionTable[11, 1].Action = ParserAction.Reduce; //actionTable[11, 1].Destination = 5; //actionTable[11, 2].Action = ParserAction.Reduce; //actionTable[11, 2].Destination = 5; //actionTable[11, 4].Action = ParserAction.Reduce; //actionTable[11, 4].Destination = 5; //actionTable[11, 5].Action = ParserAction.Reduce; //actionTable[11, 5].Destination = 5; #endregion gotoTable = new int[12, 3]; #region Populating Goto Table gotoTable[0, 0] = 1; gotoTable[0, 1] = 2; gotoTable[0, 2] = 3; gotoTable[4, 0] = 8; gotoTable[4, 1] = 2; gotoTable[4, 2] = 3; gotoTable[6, 1] = 9; gotoTable[6, 2] = 3; gotoTable[7, 2] = 10; #endregion var stack = new Stack<int>(); var parser = new LRParser(); var E = new NonTerminal("E"); var T = new NonTerminal("T"); var F = new NonTerminal("F"); var id = tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Choice( RegExp.Literal('0'), RegExp.Literal('1'), RegExp.Literal('2'), RegExp.Literal('3'), RegExp.Literal('4'), RegExp.Literal('5'), RegExp.Literal('6'), RegExp.Literal('7'), RegExp.Literal('8'), RegExp.Literal('9')))); var plus = tokenizer.UseTerminal(RegExp.Literal('+')); var mult = tokenizer.UseTerminal(RegExp.Literal('*')); var leftBrace = tokenizer.UseTerminal(RegExp.Literal('(')); var rightBrace = tokenizer.UseTerminal(RegExp.Literal(')')); tokenizer.BuildTransitions(); var grammar = new AugmentedGrammar() { E --> E & plus & T ^ (v => stack.Push(stack.Pop() + stack.Pop())), E --> T, T --> T & mult & F ^ (v => stack.Push(stack.Pop() * stack.Pop())), T --> F, F --> leftBrace & E & rightBrace, F --> id ^ (v => stack.Push(v[id].AsInt())) }; Console.WriteLine("Grammar is being tested: \n{0}", grammar); Console.WriteLine("Input is being parsed: {0}\n", INPUT); Console.WriteLine("Parsing process:\n"); parser.Grammar = grammar; parser.Lexer = lexer; parser.ParsingTable = new LRParsingTable(actionTable, gotoTable); parser.InputAccepted += (sender, eventArgs) => Console.WriteLine("Accepted!"); parser.ParseInput(); Assert.That(stack.Pop(), Is.EqualTo(14)); Assert.That(stack.Count, Is.EqualTo(0)); fileStream.Close(); File.Delete(fileName); }
public void Temptest(string encodingStr) { var encoding = CommonTestRoutines.GetEncoding(encodingStr); var tokenizer = new RegExpTokenizer(); tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); //tokenizer.IgnoreTerminal(RegExp.Sequence(RegExp.Literal("/*", encoding), RegExp.AnyNumberOf(RegExp.Range((char)0, (char)255, encoding)), // RegExp.Literal("*/", encoding))); tokenizer.IgnoreTerminal(RegExp.Sequence( RegExp.Literal("/*", encoding), //RegExp.AnyNumberOf( // RegExp.Range((char)0, (char)255, encoding) //), //RegExp.Choice( // RegExp.AnyNumberOf( // RegExp.Range((char)0, (char)255, encoding) // ), RegExp.Not(RegExp.Literal("*/", encoding), false) //) )); tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Range('0', '9', encoding))); tokenizer.UseTerminal(RegExp.AtLeastOneOf(RegExp.Literal(' ', encoding))); tokenizer.BuildTransitions(); const string input = "/*111*/ 222 /*333*/ 444"; int bufferLength = encoding.GetByteCount(input); tokenizer.TokensClasses = new int[bufferLength]; tokenizer.TokensIndices = new int[bufferLength]; tokenizer.TokensLengths = new int[bufferLength]; var rawInput = encoding.GetBytes(input); //rawInput = new byte[] {00, 49, 00, 50, 00, 51, 00, 32}; int tokensNum = tokenizer.Tokenize(rawInput, 0, rawInput.Length) + 1; }
public void LexicalActionTest() { // Classes 2 13 14 15 // Indices 01234567890 const string SOURCE = @"aa bb cc dd"; var buffer = Encoding.ASCII.GetBytes(SOURCE); int[] tokensIndices = new int[SOURCE.Length]; int[] tokensClasses = new int[SOURCE.Length]; int[] tokenLengths = new int[SOURCE.Length]; var tokenizer = new RegExpTokenizer() { TokensClasses = tokensClasses, TokensIndices = tokensIndices, TokensLengths = tokenLengths }; tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); var lexicalActionExecuted = false; Token tokenBB=null, tokenCC=null; tokenizer.UseTerminal(RegExp.Literal(" ")); // class 1 tokenizer.UseTerminal(RegExp.Literal("aa")); // class 2 tokenizer.UseTerminal(RegExp.Literal("bb"), (t) => { tokenBB = t; lexicalActionExecuted = true; return true; // Pass this token to parser }); tokenizer.UseTerminal(RegExp.Literal("cc"), (t) => { tokenCC = t; lexicalActionExecuted &= true; return false; // Ignore token }); tokenizer.UseTerminal(RegExp.Literal("dd")); tokenizer.BuildTransitions(); var tokensCount = tokenizer.Tokenize(buffer, 0, SOURCE.Length) + 1; Assert.That(tokenBB.Buffer == buffer); Assert.That(tokenBB.Offset, Is.EqualTo(3)); Assert.That(tokenBB.Class, Is.EqualTo(3)); Assert.That(tokenBB.Length, Is.EqualTo(2)); Assert.That(tokenCC.Buffer == buffer); Assert.That(tokenCC.Offset, Is.EqualTo(6)); Assert.That(tokenCC.Class, Is.EqualTo(4)); Assert.That(tokenCC.Length, Is.EqualTo(2)); Assert.True(lexicalActionExecuted, "The lexical actions were not executed partially or at all"); Assert.That(tokensCount, Is.EqualTo(6)); Assert.That(tokensClasses[0], Is.EqualTo(2)); Assert.That(tokensClasses[1], Is.EqualTo(1)); Assert.That(tokensClasses[2], Is.EqualTo(3)); Assert.That(tokensClasses[3], Is.EqualTo(1)); Assert.That(tokensClasses[4], Is.EqualTo(1)); Assert.That(tokensClasses[5], Is.EqualTo(5)); }
public void UnendedTokenAtTheEndTest() { // Indices: 00000000001111111111222222222 // 01234567890123456789012345678 const string SAMPLE = "windows.bugsNum=long.Max;Linu"; var tokensClasses = new int[SAMPLE.Length]; var tokensIndices = new int[SAMPLE.Length]; var tokensLengths = new int[SAMPLE.Length]; var tokenizer = new RegExpTokenizer() { TokensClasses = tokensClasses, TokensIndices = tokensIndices, TokensLengths = tokensLengths }; tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); tokenizer.UseTerminal(RegExp.Literal("windows")); // class: 1 tokenizer.UseTerminal(RegExp.Literal(".")); // class: 2 tokenizer.UseTerminal(RegExp.Literal("bugsNum")); // class: 3 tokenizer.UseTerminal(RegExp.Literal("=")); // class: 4 tokenizer.UseTerminal(RegExp.Literal("long")); // class: 5 tokenizer.UseTerminal(RegExp.Literal("Max")); // class: 6 tokenizer.UseTerminal(RegExp.Literal(";")); // class: 7 tokenizer.UseTerminal(RegExp.Literal("Linux")); // class: 8 tokenizer.BuildTransitions(); var tokensNum = tokenizer.Tokenize(Encoding.ASCII.GetBytes(SAMPLE), 0, SAMPLE.Length) + 1; Assert.That(tokensNum == 9); Assert.That(tokensIndices[0] == 0); Assert.That(tokensIndices[1] == 7); Assert.That(tokensIndices[2] == 8); Assert.That(tokensIndices[3] == 15); Assert.That(tokensIndices[4] == 16); Assert.That(tokensIndices[5] == 20); Assert.That(tokensIndices[6] == 21); Assert.That(tokensIndices[7] == 24); Assert.That(tokensIndices[8] == 25); Assert.That(tokensClasses[0] == 1); Assert.That(tokensClasses[1] == 2); Assert.That(tokensClasses[2] == 3); Assert.That(tokensClasses[3] == 4); Assert.That(tokensClasses[4] == 5); Assert.That(tokensClasses[5] == 2); Assert.That(tokensClasses[6] == 6); Assert.That(tokensClasses[7] == 7); }
public void TokenizerClassTest() { // Should be: aa, aab, abc const string SAMPLE = "aaaababc"; var tokensClasses = new int[SAMPLE.Length]; var tokensIndices = new int[SAMPLE.Length]; var tokensLengths = new int[SAMPLE.Length]; var tokenizer = new RegExpTokenizer() { TokensClasses = tokensClasses, TokensIndices = tokensIndices, TokensLengths = tokensLengths }; tokenizer.SetTransitionFunction(new TableDrivenTransitionFunction()); tokenizer.UseTerminal(RegExp.Literal("aab")); // class 1 tokenizer.UseTerminal(RegExp.Literal("acb")); // class 2 tokenizer.UseTerminal(RegExp.Literal("abc")); // class 3 tokenizer.UseTerminal(RegExp.Literal("aa")); // class 4 tokenizer.BuildTransitions(); var tokensNum = tokenizer.Tokenize(Encoding.ASCII.GetBytes(SAMPLE), 0, SAMPLE.Length); tokensNum++; Assert.That(tokensNum == 3); Assert.That(tokensClasses[0] == 4); Assert.That(tokensClasses[1] == 1); Assert.That(tokensClasses[2] == 3); Assert.That(tokensIndices[0] == 0); Assert.That(tokensIndices[1] == 2); Assert.That(tokensIndices[2] == 5); }