public void Test() { string[] strings = new[] { "es", " ", "\n", "eju" }; List <Token> tokens = new List <Token>(); PositionCounter feeder = new PositionCounter(); foreach (string s in strings) { Token token = new Token(); token.Text = s; token.Position = feeder.Position; token.Line = feeder.Line; token.LinePosition = feeder.LinePosition; feeder.Add(s); token.PositionEnd = feeder.Position; token.LineEnd = feeder.Line; token.LinePositionEnd = feeder.LinePosition; tokens.Add(token); } Assert.AreEqual(4, tokens.Count); Assert.AreEqual(0, tokens[0].Position); Assert.AreEqual(2, tokens[0].PositionEnd); Assert.AreEqual(0, tokens[0].Line); Assert.AreEqual(0, tokens[0].LineEnd); Assert.AreEqual(0, tokens[0].LinePosition); Assert.AreEqual(2, tokens[0].LinePositionEnd); Assert.AreEqual(2, tokens[1].Position); Assert.AreEqual(3, tokens[1].PositionEnd); Assert.AreEqual(0, tokens[1].Line); Assert.AreEqual(0, tokens[1].LineEnd); Assert.AreEqual(2, tokens[1].LinePosition); Assert.AreEqual(3, tokens[1].LinePositionEnd); Assert.AreEqual(3, tokens[2].Position); Assert.AreEqual(4, tokens[2].PositionEnd); Assert.AreEqual(0, tokens[2].Line); Assert.AreEqual(1, tokens[2].LineEnd); Assert.AreEqual(3, tokens[2].LinePosition); Assert.AreEqual(0, tokens[2].LinePositionEnd); Assert.AreEqual(4, tokens[3].Position); Assert.AreEqual(7, tokens[3].PositionEnd); Assert.AreEqual(1, tokens[3].Line); Assert.AreEqual(1, tokens[3].LineEnd); Assert.AreEqual(0, tokens[3].LinePosition); Assert.AreEqual(3, tokens[3].LinePositionEnd); }
private Token CreateToken(CharReader reader, PositionCounter start, PositionCounter end, int[] tokenTypeIDs) { Token token = tokenTypeIDs != null && tokenTypeIDs.Length > 0 ? tokenCreators[tokenTypeIDs[0]]() : new Token(); token.Position = start.Position; token.PositionEnd = end.Position; token.Line = start.Line; token.LineEnd = end.Line; token.LinePosition = start.LinePosition; token.LinePositionEnd = end.LinePosition; token.Text = reader.Substring(start.Position, end.Position); return(token); }
internal virtual IEnumerable <Token> Tokenize(CharReader reader) { Compile(); if (dfa.Start == null) { yield break; } PositionCounter start = new PositionCounter(); PositionCounter end = null; int[] tokenTypeIDs = null; DFA.State current = dfa.Start; while (!reader.IsEnd) { char c = reader.Read(); current = current[c]; // reached the end, nowhere else to go // return the match until the prev final state // and go back to the next char right after the prev final state if (current == null) { yield return(CreateToken(reader, start, end, tokenTypeIDs)); reader.MoveBack(end.Position); reader.Release(); start = reader.PositionCounter; end = null; tokenTypeIDs = null; current = dfa.Start; continue; } // remember this position in case we need to come back if (current.IsFinal) { end = reader.PositionCounter; tokenTypeIDs = current.Values; } } if (end != null) { yield return(CreateToken(reader, start, end, tokenTypeIDs)); if (end.Position != reader.Position) { reader.MoveBack(end.Position - 1); foreach (Token token in Tokenize(reader)) { yield return(token); } } } else { yield return(CreateToken(reader, start, reader.PositionCounter, tokenTypeIDs)); } }