Beispiel #1
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new LetterOrDigitTokenizer(reader);

            if (_ignoreLanguageAccents)
            {
                result = new ASCIIFoldingFilter(result);
            }
            if (_caseInsensitive)
            {
                result = new LowerCaseFilter(result);
            }
            return(result);
        }
Beispiel #2
0
        public void WarAndPeace()
        {
            var trie      = new Trie();
            var filePath  = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestDocs", "warandpeace.txt");
            var tokenizer = new LetterOrDigitTokenizer();

            using (var reader = new StreamReader(File.OpenRead(filePath)))
            {
                foreach (var token in tokenizer.Tokenize(reader))
                {
                    trie.Add(token.ToLower());
                }
            }

            var found = trie.WildcardSearch("при?ет");

            CollectionAssert.AreEquivalent(new[] { "придет", "примет" }, found);

            found = trie.WildcardSearch("здр*в?й*");
            CollectionAssert.AreEquivalent(new[] { "здравый", "здравствуй", "здравствуйте" }, found);
        }