Beispiel #1
0
 public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap <string> stemOverrideDict)
 {
     this.matchVersion = matchVersion;
     this.stoptable    = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.excltable    = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
     if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31))
     {
         this.stemdict     = null;
         this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
     }
     else
     {
         this.origStemdict = null;
         // we don't need to ignore case here since we lowercase in this analyzer anyway
         StemmerOverrideFilter.Builder        builder = new StemmerOverrideFilter.Builder(false);
         CharArrayMap <string> .EntryIterator iter    = stemOverrideDict.entrySet().GetEnumerator();
         CharsRef spare = new CharsRef();
         while (iter.hasNext())
         {
             char[] nextKey = iter.nextKey();
             spare.copyChars(nextKey, 0, nextKey.Length);
             builder.add(spare, iter.currentValue());
         }
         try
         {
             this.stemdict = builder.build();
         }
         catch (IOException ex)
         {
             throw new Exception("can not build stem dict", ex);
         }
     }
 }
Beispiel #2
0
        public virtual void testToString()
        {
            CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test"));

            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));

            set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test"));
            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));
        }
Beispiel #3
0
        /// <summary>
        /// Test the static #copy() function with a CharArraySet as a source
        /// </summary>
        public virtual void testCopyCharArraySet()
        {
            CharArraySet setIngoreCase    = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
            }
            setIngoreCase.addAll(TEST_STOP_WORDS);
            setIngoreCase.add(Convert.ToInt32(1));
            setCaseSensitive.addAll(TEST_STOP_WORDS);
            setCaseSensitive.add(Convert.ToInt32(1));

            CharArraySet copy         = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
            CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

            assertEquals(setIngoreCase.size(), copy.size());
            assertEquals(setCaseSensitive.size(), copy.size());

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copyCaseSens.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copyCaseSens.contains(@string));
            }
            // test adding terms to the copy
            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(setIngoreCase.contains(@string));
                assertFalse(setCaseSensitive.contains(@string));
            }
        }
Beispiel #4
0
        /// <summary>
        /// Test the static #copy() function with a JDK <seealso cref="Set"/> as a source
        /// </summary>
        public virtual void testCopyJDKSet()
        {
            ISet <string> set = new HashSet <string>();

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
            }
            set.addAll(TEST_STOP_WORDS);

            CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);

            assertEquals(set.Count, copy.size());
            assertEquals(set.Count, copy.size());

            assertTrue(copy.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copy.contains(@string));
            }

            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(set.Contains(@string));
            }
        }
Beispiel #5
0
 /// <summary>
 /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
 /// stemming.
 /// </summary>
 /// <param name="matchVersion"> lucene compatibility version </param>
 /// <param name="stopwords"> a stopword set </param>
 /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
 public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
 {
     this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
 }
Beispiel #6
0
 /// <summary>
 /// Builds the named analyzer with the given stop words. </summary>
 public SnowballAnalyzer(Version matchVersion, string name, CharArraySet stopWords) : this(matchVersion, name)
 {
     stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopWords));
 }
Beispiel #7
0
 /// <summary>
 /// Builds an analyzer with the given stop words
 /// </summary>
 /// <param name="matchVersion">
 ///          lucene compatibility version </param>
 /// <param name="stopwords">
 ///          a stopword set </param>
 /// <param name="stemExclutionSet">
 ///          a stemming exclusion set </param>
 public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclutionSet) : base(matchVersion, stopwords)
 {
     this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclutionSet));
 }
Beispiel #8
0
 /// <summary>
 /// Builds an analyzer with the given stop words and a set of work to be
 /// excluded from the <seealso cref="CzechStemFilter"/>.
 /// </summary>
 /// <param name="matchVersion"> Lucene version to match See
 ///          <seealso cref="<a href="#version">above</a>"/> </param>
 /// <param name="stopwords"> a stopword set </param>
 /// <param name="stemExclusionTable"> a stemming exclusion set </param>
 public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) : base(matchVersion, stopwords)
 {
     this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
 }
Beispiel #9
0
 /// <summary>
 /// Builds an analyzer with the given stop words and stemming exclusion words
 /// </summary>
 /// <param name="matchVersion">
 ///          lucene compatibility version </param>
 /// <param name="stopwords">
 ///          a stopword set </param>
 public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : this(matchVersion, stopwords)
 {
     excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
 }
Beispiel #10
0
 /// <summary>
 /// Tests a special case of <seealso cref="CharArraySet#copy(Version, Set)"/> where the
 /// set to copy is the <seealso cref="CharArraySet#EMPTY_SET"/>
 /// </summary>
 public virtual void testCopyEmptySet()
 {
     assertSame(CharArraySet.EMPTY_SET, CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET));
 }