public IEnumerable<WordSegment> LexWords(IDocument document) { Ensure.That(() => document).IsNotNull(); using (var lexer = new StringWordLexer(document.GetAllText())) { return lexer.LexWords().ToArray(); } }
public IAnalyzedDocument Analyze(IDocument document) { var allText = document.GetAllText(); var allWords = lexerFactory().LexWords(document); return new AnalyzedDocument(allText, allWords .Where(segment => wordProcessor.HasDiacritics(segment.Word)) .Select(segment => segment.ToLower()) .DistinctBy(segment => segment.Word) .ToArray()); }