TokenFilter that uses hunspell affix rules and words to stem tokens. Since hunspell supports a word having multiple stems, this filter can emit multiple tokens for each consumed token.
Inheritance: TokenFilter
 public virtual void TestLongestOnly()
 {
     MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
     tokenizer.EnableChecks = true;
     HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
     AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
 }
Example #2
0
        public virtual void TestLongestOnly()
        {
            MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));

            tokenizer.EnableChecks = true;
            HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);

            AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
        }
        public virtual void TestKeywordAttribute()
        {
            MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
            tokenizer.EnableChecks = true;
            HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary);
            AssertTokenStreamContents(filter, new string[] { "lucene", "lucen", "is", "awesome" }, new int[] { 1, 0, 1, 1 });

            // assert with keyword marker
            tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.AsList("Lucene"), true);
            filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary);
            AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
        }
Example #4
0
        public virtual void TestKeywordAttribute()
        {
            MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));

            tokenizer.EnableChecks = true;
            HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary);

            AssertTokenStreamContents(filter, new string[] { "lucene", "lucen", "is", "awesome" }, new int[] { 1, 0, 1, 1 });

            // assert with keyword marker
            tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "Lucene" }, true);

            filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary);
            AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
        }
 public override TokenStream TokenStream(String fieldName, TextReader reader) {
     TokenStream stream = new StandardTokenizer(LuceneVersion.LUCENE_29, reader);
     stream = new LowerCaseFilter(stream);
     stream = new HunspellStemFilter(stream, _dictionary);
     return stream;
 }