public override TokenStream TokenStream(string fieldName, TextReader reader) { TokenStream result = new LetterOrDigitTokenizer(reader); if (_ignoreLanguageAccents) { result = new ASCIIFoldingFilter(result); } if (_caseInsensitive) { result = new LowerCaseFilter(result); } return(result); }
public void WarAndPeace() { var trie = new Trie(); var filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestDocs", "warandpeace.txt"); var tokenizer = new LetterOrDigitTokenizer(); using (var reader = new StreamReader(File.OpenRead(filePath))) { foreach (var token in tokenizer.Tokenize(reader)) { trie.Add(token.ToLower()); } } var found = trie.WildcardSearch("при?ет"); CollectionAssert.AreEquivalent(new[] { "придет", "примет" }, found); found = trie.WildcardSearch("здр*в?й*"); CollectionAssert.AreEquivalent(new[] { "здравый", "здравствуй", "здравствуйте" }, found); }