public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap <string> stemOverrideDict) { this.matchVersion = matchVersion; this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31)) { this.stemdict = null; this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict)); } else { this.origStemdict = null; // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap <string> .EntryIterator iter = stemOverrideDict.entrySet().GetEnumerator(); CharsRef spare = new CharsRef(); while (iter.hasNext()) { char[] nextKey = iter.nextKey(); spare.copyChars(nextKey, 0, nextKey.Length); builder.add(spare, iter.currentValue()); } try { this.stemdict = builder.build(); } catch (IOException ex) { throw new Exception("can not build stem dict", ex); } } }
public virtual void testToString() { CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test")); assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test")); assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); }
/// <summary> /// Test the static #copy() function with a CharArraySet as a source /// </summary> public virtual void testCopyCharArraySet() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper(Locale.ROOT)); } setIngoreCase.addAll(TEST_STOP_WORDS); setIngoreCase.add(Convert.ToInt32(1)); setCaseSensitive.addAll(TEST_STOP_WORDS); setCaseSensitive.add(Convert.ToInt32(1)); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copyCaseSens.contains(@string)); } // test adding terms to the copy IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(setIngoreCase.contains(@string)); assertFalse(setCaseSensitive.contains(@string)); } }
/// <summary> /// Test the static #copy() function with a JDK <seealso cref="Set"/> as a source /// </summary> public virtual void testCopyJDKSet() { ISet <string> set = new HashSet <string>(); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper(Locale.ROOT)); } set.addAll(TEST_STOP_WORDS); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set); assertEquals(set.Count, copy.size()); assertEquals(set.Count, copy.size()); assertTrue(copy.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copy.contains(@string)); } IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(set.Contains(@string)); } }
/// <summary> /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before /// stemming. /// </summary> /// <param name="matchVersion"> lucene compatibility version </param> /// <param name="stopwords"> a stopword set </param> /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param> public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords) { this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); }
/// <summary> /// Builds the named analyzer with the given stop words. </summary> public SnowballAnalyzer(Version matchVersion, string name, CharArraySet stopWords) : this(matchVersion, name) { stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopWords)); }
/// <summary> /// Builds an analyzer with the given stop words /// </summary> /// <param name="matchVersion"> /// lucene compatibility version </param> /// <param name="stopwords"> /// a stopword set </param> /// <param name="stemExclutionSet"> /// a stemming exclusion set </param> public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclutionSet) : base(matchVersion, stopwords) { this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclutionSet)); }
/// <summary> /// Builds an analyzer with the given stop words and a set of work to be /// excluded from the <seealso cref="CzechStemFilter"/>. /// </summary> /// <param name="matchVersion"> Lucene version to match See /// <seealso cref="<a href="#version">above</a>"/> </param> /// <param name="stopwords"> a stopword set </param> /// <param name="stemExclusionTable"> a stemming exclusion set </param> public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) : base(matchVersion, stopwords) { this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); }
/// <summary> /// Builds an analyzer with the given stop words and stemming exclusion words /// </summary> /// <param name="matchVersion"> /// lucene compatibility version </param> /// <param name="stopwords"> /// a stopword set </param> public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : this(matchVersion, stopwords) { excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); }
/// <summary> /// Tests a special case of <seealso cref="CharArraySet#copy(Version, Set)"/> where the /// set to copy is the <seealso cref="CharArraySet#EMPTY_SET"/> /// </summary> public virtual void testCopyEmptySet() { assertSame(CharArraySet.EMPTY_SET, CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET)); }