/// <inheritdoc /> public IReadOnlyList <Token> Process(ReadOnlySpan <char> text) { var processedTokens = new TokenStore(); var tokenIndex = 0; var tokenBuilder = new StringBuilder(); this.Process(text, ref tokenIndex, 0, processedTokens, tokenBuilder); return(processedTokens.ToList()); }
public IEnumerable <Token> Process(ReadOnlySpan <char> input) { var processedWords = new TokenStore(); var wordIndex = 0; var start = 0; var wordBuilder = new StringBuilder(); Process(processedWords, ref wordIndex, ref start, 0, wordBuilder, input); return(processedWords.ToList()); }
public IEnumerable <Token> Process(ReadOnlySpan <char> input) { var processedWords = new TokenStore(); // TODO Pool? var wordIndex = 0; var start = 0; var wordBuilder = new StringBuilder(); var hash = new TokenHash(); for (var i = 0; i < input.Length; i++) { var current = input[i]; if (this.IsWordSplitCharacter(current)) { if (wordBuilder.Length > 0) { CaptureWord(processedWords, hash, wordIndex, start, i, wordBuilder); wordIndex++; wordBuilder.Length = 0; hash = new TokenHash(); } start = i + 1; } else { foreach (var processed in this.inputPreprocessorPipeline.Process(current)) { wordBuilder.Append(processed); hash = hash.Combine(processed); } } } if (wordBuilder.Length > 0) { CaptureWord(processedWords, hash, wordIndex, start, input.Length, wordBuilder); } return(processedWords.ToList()); }
public IEnumerable <Token> Process(IEnumerable <string> inputs) { if (inputs is null) { return(Enumerable.Empty <Token>()); } var processedWords = new TokenStore(); var wordIndex = 0; var start = 0; var wordBuilder = new StringBuilder(); var endOffset = 0; foreach (var input in inputs) { Process(processedWords, ref wordIndex, ref start, endOffset, wordBuilder, input.AsSpan()); endOffset += input.Length; } return(processedWords.ToList()); }
/// <inheritdoc /> public IReadOnlyList <Token> Process(IEnumerable <DocumentTextFragment> document) { if (document is null) { return(Array.Empty <Token>()); } var processedTokens = new TokenStore(); var tokenIndex = 0; var tokenBuilder = new StringBuilder(); foreach (var documentFragment in document) { this.Process( documentFragment.Text.Span, ref tokenIndex, documentFragment.Offset, processedTokens, tokenBuilder); } return(processedTokens.ToList()); }