A TokenFilter that applies ArabicStemmer to stem Arabic words..

To prevent terms from being stemmed use an instance of SetKeywordMarkerFilter or a custom TokenFilter that sets the KeywordAttribute before this TokenStream.

Inheritance: TokenFilter
Exemple #1
0
        private void Check(string input, string expected)
        {
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
            ArabicStemFilter      filter      = new ArabicStemFilter(tokenStream);

            AssertTokenStreamContents(filter, new string[] { expected });
        }
        private void Check(string input, string expected)
        {
#pragma warning disable 612, 618
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
#pragma warning restore 612, 618
            ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
            AssertTokenStreamContents(filter, new string[] { expected });
        }
Exemple #3
0
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("ساهدهات");
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));

            ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set));

            AssertTokenStreamContents(filter, new string[] { "ساهدهات" });
        }
        private void Check(string input, string expected)
        {
#pragma warning disable 612, 618
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
#pragma warning restore 612, 618
            ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
            AssertTokenStreamContents(filter, new string[] { expected });
        }
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
            set.add("ساهدهات");
#pragma warning disable 612, 618
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
#pragma warning restore 612, 618

            ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set));
            AssertTokenStreamContents(filter, new string[] { "ساهدهات" });
        }