Analyzer for Dutch language. Supports an external list of stopwords (words that will not be indexed at all), an external list of exclusions (word that will not be stemmed, but indexed) and an external list of word-stem pairs that overrule the algorithm (dictionary stemming). A default set of stopwords is used unless an alternative list is specified, the exclusion list is empty by default. As start for the Analyzer the German Analyzer was used. The stemming algorithm implemented can be found at $Id: DutchAnalyzer.java,v 1.1 2004/03/09 14:55:08 otis Exp $
Inheritance: Analyzer
Example #1
0
        public virtual void TestStopwordsCasing()
        {
#pragma warning disable 612, 618
            DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_31);
#pragma warning restore 612, 618
            AssertAnalyzesTo(a, "Zelf", new string[] { });
        }
Example #2
0
        public virtual void TestOldBuggyStemmer()
        {
            Analyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_30);

            CheckOneTerm(a, "opheffen", "ophef");   // versus snowball 'opheff'
            CheckOneTerm(a, "opheffende", "ophef"); // versus snowball 'opheff'
            CheckOneTerm(a, "opheffing", "ophef");  // versus snowball 'opheff'
        }
Example #3
0
        public virtual void TestSnowballCorrectness()
        {
            Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);

            CheckOneTerm(a, "opheffen", "opheff");
            CheckOneTerm(a, "opheffende", "opheff");
            CheckOneTerm(a, "opheffing", "opheff");
        }
Example #4
0
        public virtual void Test30StemOverrides()
        {
            DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_30);

            CheckOneTerm(a, "fiets", "fiets");
            a = new DutchAnalyzer(LuceneVersion.LUCENE_30, CharArraySet.EMPTY_SET);
            CheckOneTerm(a, "fiets", "fiet"); // only the default ctor populates the dict
        }
Example #5
0
        public virtual void TestReusableTokenStream()
        {
            Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);

            CheckOneTerm(a, "lichaamsziek", "lichaamsziek");
            CheckOneTerm(a, "lichamelijk", "licham");
            CheckOneTerm(a, "lichamelijke", "licham");
            CheckOneTerm(a, "lichamelijkheden", "licham");
        }
Example #6
0
        public virtual void TestExclusionTableViaCtor()
        {
#pragma warning disable 612, 618
            CharArraySet set = new CharArraySet(LuceneVersion.LUCENE_30, 1, true);
#pragma warning restore 612, 618
            set.add("lichamelijk");
            DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });

            a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });
        }
Example #7
0
        public virtual void TestBuggyStopwordsCasing()
        {
            DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_30);

            AssertAnalyzesTo(a, "Zelf", new string[] { "zelf" });
        }
Example #8
0
        public virtual void TestBuggyStemOverrides()
        {
            DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_35, CharArraySet.EMPTY_SET);

            CheckOneTerm(a, "fiets", "fiet");
        }
Example #9
0
        public virtual void TestEmptyStemDictionary()
        {
            DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET, CharArrayMap <string> .EmptyMap());

            CheckOneTerm(a, "fiets", "fiet");
        }
Example #10
0
        public virtual void TestStemOverrides()
        {
            DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);

            CheckOneTerm(a, "fiets", "fiets");
        }
 public virtual void TestStemOverrides()
 {
     DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
     CheckOneTerm(a, "fiets", "fiets");
 }
 public virtual void TestSnowballCorrectness()
 {
     Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
     CheckOneTerm(a, "opheffen", "opheff");
     CheckOneTerm(a, "opheffende", "opheff");
     CheckOneTerm(a, "opheffing", "opheff");
 }
Example #13
0
 public void TestExclusionTableReuse()
 {
     DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
     CheckOneTermReuse(a, "lichamelijk", "licham");
     a.SetStemExclusionTable(new String[] { "lichamelijk" });
     CheckOneTermReuse(a, "lichamelijk", "lichamelijk");
 }
        public virtual void TestStopwordsCasing()
        {
#pragma warning disable 612, 618
            DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_31);
#pragma warning restore 612, 618
            AssertAnalyzesTo(a, "Zelf", new string[] { });
        }
 public virtual void TestBuggyStopwordsCasing()
 {
     DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_30);
     AssertAnalyzesTo(a, "Zelf", new string[] { "zelf" });
 }
 public virtual void TestBuggyStemOverrides()
 {
     DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_35, CharArraySet.EMPTY_SET);
     CheckOneTerm(a, "fiets", "fiet");
 }
 public virtual void TestEmptyStemDictionary()
 {
     DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET, CharArrayMap<string>.EmptyMap());
     CheckOneTerm(a, "fiets", "fiet");
 }
 public virtual void Test30StemOverrides()
 {
     DutchAnalyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_30);
     CheckOneTerm(a, "fiets", "fiets");
     a = new DutchAnalyzer(LuceneVersion.LUCENE_30, CharArraySet.EMPTY_SET);
     CheckOneTerm(a, "fiets", "fiet"); // only the default ctor populates the dict
 }
 public virtual void TestReusableTokenStream()
 {
     Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
     CheckOneTerm(a, "lichaamsziek", "lichaamsziek");
     CheckOneTerm(a, "lichamelijk", "licham");
     CheckOneTerm(a, "lichamelijke", "licham");
     CheckOneTerm(a, "lichamelijkheden", "licham");
 }
Example #20
0
 public void TestReusableTokenStream()
 {
     Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
     CheckOneTermReuse(a, "lichaamsziek", "lichaamsziek");
     CheckOneTermReuse(a, "lichamelijk", "licham");
     CheckOneTermReuse(a, "lichamelijke", "licham");
     CheckOneTermReuse(a, "lichamelijkheden", "licham");
 }
        public virtual void TestExclusionTableViaCtor()
        {
#pragma warning disable 612, 618
            CharArraySet set = new CharArraySet(LuceneVersion.LUCENE_30, 1, true);
#pragma warning restore 612, 618
            set.add("lichamelijk");
            DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });

            a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });

        }
Example #22
0
 public void TestStemDictionaryReuse()
 {
     DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
     CheckOneTermReuse(a, "lichamelijk", "licham");
     a.SetStemDictionary(customDictFile);
     CheckOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
 }
 public virtual void TestOldBuggyStemmer()
 {
     Analyzer a = new DutchAnalyzer(LuceneVersion.LUCENE_30);
     CheckOneTerm(a, "opheffen", "ophef"); // versus snowball 'opheff'
     CheckOneTerm(a, "opheffende", "ophef"); // versus snowball 'opheff'
     CheckOneTerm(a, "opheffing", "ophef"); // versus snowball 'opheff'
 }