public LexerResult Tokenize(string query) { if (string.IsNullOrWhiteSpace(query)) { throw new ArgumentException(); } var finalResult = new LexerResult(); var reader = new LexerStringReader(query, 0); while (reader.IsEndOfQuery() == false) { reader.MoveBy(SkipWhitespaces(reader)); var tokenFound = false; foreach (var tokenizer in _languageConfig.TokenizersSource.OrderedTokenizers) { var runtimeInfo = new LexerRuntimeInfo(reader); var result = tokenizer.Tokenize(ref runtimeInfo); if (result.Success || result.IsPartiallySuccessful) { finalResult.Tokens.Add(result.Token); reader.ForceMoveBy(result.Token.Length); tokenFound = true; break; } } if (tokenFound == false) { var startPosition = reader.CurrentPosition; var word = reader.ReadTillEndOfWord(); var unknownToken = new UnknownToken(startPosition, word.Length); finalResult.Tokens.Add(unknownToken); } } return(finalResult); }
private bool TryFindNode(ref LexerStringReader reader, out Node node, out int length) { var initialPosition = reader.CurrentPosition; var currentNode = _root; Node lastSuccessfulNode = null; var lastSuccessfulLength = 0; var endOfQuery = false; while (currentNode != null) { var progressed = false; var currentChar = reader.CurrentChar; // PERF NOTE: bisection could be implemented here, but since number of children should remain small // then overhead could be larger than actual gains foreach (var child in currentNode.Children) { var isWhitespace = false; if (char.IsWhiteSpace(child.Key) && char.IsWhiteSpace(currentChar)) { isWhitespace = true; } else if (currentChar != child.Key && (_caseSensitive || char.ToLower(currentChar) != char.ToLower(child.Key))) { // TODO: PERF: use zero cost extension point trick to implement case sensivity flag (see Performance.md) continue; } // check if not the end if (reader.IsEndOfQuery()) { endOfQuery = true; } // read characters if (isWhitespace) { reader.ReadTillEndOfWhitespace(); // TODO: multi whitespace read opt in (perhaps use extension point trick?) } else { reader.MoveBy(1); } // assign results currentNode = child; if (currentNode.Value != null) { lastSuccessfulNode = currentNode; lastSuccessfulLength = reader.CurrentPosition - initialPosition; } // break if nothing more to be processed if (endOfQuery) { break; } progressed = true; break; } if (endOfQuery) { break; } if (!progressed) { break; } } if (lastSuccessfulNode != null) { length = lastSuccessfulLength; if (endOfQuery) { length++; } node = lastSuccessfulNode; return(true); } node = null; length = 0; return(false); }