private int SkipWhitespaces(LexerStringReader reader) { var whitespaceRuntimeInfo = new LexerRuntimeInfo(reader); var whitespaceResult = _whitespaceTokenizer.Tokenize(ref whitespaceRuntimeInfo); return(whitespaceResult.Token?.Length ?? 0); }
public bool TryFind(LexerStringReader reader, out T value, out int length) { if (TryFindNode(ref reader, out var node, out length)) { value = node.Value; return(true); } value = null; return(false); }
public static LexerStringPositionProvider Create(LexerStringReader reader) { reader.MoveTo(new LexerStringReader.Position(0)); // TODO: heap alloc :( var linesToPositionMap = new List <int>(10) { 0 }; while (reader.MoveToNextLine()) { linesToPositionMap.Add(reader.CurrentPosition); } return(new LexerStringPositionProvider(linesToPositionMap)); }
public LexerResult Tokenize(string query) { if (string.IsNullOrWhiteSpace(query)) { throw new ArgumentException(); } var finalResult = new LexerResult(); var reader = new LexerStringReader(query, 0); while (reader.IsEndOfQuery() == false) { reader.MoveBy(SkipWhitespaces(reader)); var tokenFound = false; foreach (var tokenizer in _languageConfig.TokenizersSource.OrderedTokenizers) { var runtimeInfo = new LexerRuntimeInfo(reader); var result = tokenizer.Tokenize(ref runtimeInfo); if (result.Success || result.IsPartiallySuccessful) { finalResult.Tokens.Add(result.Token); reader.ForceMoveBy(result.Token.Length); tokenFound = true; break; } } if (tokenFound == false) { var startPosition = reader.CurrentPosition; var word = reader.ReadTillEndOfWord(); var unknownToken = new UnknownToken(startPosition, word.Length); finalResult.Tokens.Add(unknownToken); } } return(finalResult); }
public LexerRuntimeInfo(LexerStringReader reader) { Reader = reader; }
private bool TryFindNode(ref LexerStringReader reader, out Node node, out int length) { var initialPosition = reader.CurrentPosition; var currentNode = _root; Node lastSuccessfulNode = null; var lastSuccessfulLength = 0; var endOfQuery = false; while (currentNode != null) { var progressed = false; var currentChar = reader.CurrentChar; // PERF NOTE: bisection could be implemented here, but since number of children should remain small // then overhead could be larger than actual gains foreach (var child in currentNode.Children) { var isWhitespace = false; if (char.IsWhiteSpace(child.Key) && char.IsWhiteSpace(currentChar)) { isWhitespace = true; } else if (currentChar != child.Key && (_caseSensitive || char.ToLower(currentChar) != char.ToLower(child.Key))) { // TODO: PERF: use zero cost extension point trick to implement case sensivity flag (see Performance.md) continue; } // check if not the end if (reader.IsEndOfQuery()) { endOfQuery = true; } // read characters if (isWhitespace) { reader.ReadTillEndOfWhitespace(); // TODO: multi whitespace read opt in (perhaps use extension point trick?) } else { reader.MoveBy(1); } // assign results currentNode = child; if (currentNode.Value != null) { lastSuccessfulNode = currentNode; lastSuccessfulLength = reader.CurrentPosition - initialPosition; } // break if nothing more to be processed if (endOfQuery) { break; } progressed = true; break; } if (endOfQuery) { break; } if (!progressed) { break; } } if (lastSuccessfulNode != null) { length = lastSuccessfulLength; if (endOfQuery) { length++; } node = lastSuccessfulNode; return(true); } node = null; length = 0; return(false); }
public void RestorePosition(ref LexerStringReader reader) { reader.CurrentPosition = _capturedPosition; }
public void CaptureCurrentPosition(ref LexerStringReader reader) { _capturedPosition = reader.CurrentPosition; }