public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap <string> stemOverrideDict) { this.matchVersion = matchVersion; this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31)) { this.stemdict = null; this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict)); } else { this.origStemdict = null; // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap <string> .EntryIterator iter = stemOverrideDict.entrySet().GetEnumerator(); CharsRef spare = new CharsRef(); while (iter.hasNext()) { char[] nextKey = iter.nextKey(); spare.copyChars(nextKey, 0, nextKey.Length); builder.add(spare, iter.currentValue()); } try { this.stemdict = builder.build(); } catch (IOException ex) { throw new Exception("can not build stem dict", ex); } } }
/// <summary> /// Create a new <see cref="StemmerOverrideFilter"/>, performing dictionary-based stemming /// with the provided dictionary (<paramref name="stemmerOverrideMap"/>). /// <para> /// Any dictionary-stemmed terms will be marked with <see cref="KeywordAttribute"/> /// so that they will not be stemmed with stemmers down the chain. /// </para> /// </summary> public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap) : base(input) { this.stemmerOverrideMap = stemmerOverrideMap; fstReader = stemmerOverrideMap.GetBytesReader(); termAtt = AddAttribute <ICharTermAttribute>(); keywordAtt = AddAttribute <IKeywordAttribute>(); }
/// <summary> /// Create a new StemmerOverrideFilter, performing dictionary-based stemming /// with the provided <code>dictionary</code>. /// <para> /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/> /// so that they will not be stemmed with stemmers down the chain. /// </para> /// </summary> public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap) : base(input) { this.stemmerOverrideMap = stemmerOverrideMap; fstReader = stemmerOverrideMap.BytesReader; termAtt = AddAttribute<ICharTermAttribute>(); keywordAtt = AddAttribute<IKeywordAttribute>(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomRealisticKeyword() throws java.io.IOException public virtual void testRandomRealisticKeyword() { IDictionary <string, string> map = new Dictionary <string, string>(); int numTerms = atLeast(50); for (int i = 0; i < numTerms; i++) { string randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random()); if (randomRealisticUnicodeString.Length > 0) { string value = TestUtil.randomSimpleString(random()); map[randomRealisticUnicodeString] = value.Length == 0 ? "a" : value; } } if (map.Count == 0) { map["booked"] = "books"; } StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean()); ISet <KeyValuePair <string, string> > entrySet = map.SetOfKeyValuePairs(); foreach (KeyValuePair <string, string> entry in entrySet) { builder.add(entry.Key, entry.Value); } StemmerOverrideMap build = builder.build(); foreach (KeyValuePair <string, string> entry in entrySet) { if (random().nextBoolean()) { Tokenizer tokenizer = new KeywordTokenizer(new StringReader(entry.Key)); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, build)); assertTokenStreamContents(stream, new string[] { entry.Value }); } } }
/// <summary> /// Create a new StemmerOverrideFilter, performing dictionary-based stemming /// with the provided <code>dictionary</code>. /// <para> /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/> /// so that they will not be stemmed with stemmers down the chain. /// </para> /// </summary> public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap) : base(input) { this.stemmerOverrideMap = stemmerOverrideMap; fstReader = stemmerOverrideMap.BytesReader; }
/// <summary> /// Create a new StemmerOverrideFilter, performing dictionary-based stemming /// with the provided <code>dictionary</code>. /// <para> /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/> /// so that they will not be stemmed with stemmers down the chain. /// </para> /// </summary> //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: //ORIGINAL LINE: public StemmerOverrideFilter(final org.apache.lucene.analysis.TokenStream input, final StemmerOverrideMap stemmerOverrideMap) public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap) : base(input) { this.stemmerOverrideMap = stemmerOverrideMap; fstReader = stemmerOverrideMap.BytesReader; }