/// <summary> /// Returns a (possibly reused) <seealso cref="TokenStream"/> which tokenizes all the /// text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A <seealso cref="TokenStream"/> built from a <seealso cref="StandardTokenizer"/> /// filtered with <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, /// <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is provided, /// <seealso cref="StemmerOverrideFilter"/>, and <seealso cref="SnowballFilter"/> </returns> public override TokenStreamComponents CreateComponents(string fieldName, TextReader aReader) { if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) { Tokenizer source = new StandardTokenizer(matchVersion, aReader); TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stoptable); if (excltable.Count > 0) { result = new SetKeywordMarkerFilter(result, excltable); } if (stemdict != null) { result = new StemmerOverrideFilter(result, stemdict); } result = new SnowballFilter(result, new Tartarus.Snowball.Ext.DutchStemmer()); return(new TokenStreamComponents(source, result)); } else { Tokenizer source = new StandardTokenizer(matchVersion, aReader); TokenStream result = new StandardFilter(matchVersion, source); result = new StopFilter(matchVersion, result, stoptable); if (excltable.Count > 0) { result = new SetKeywordMarkerFilter(result, excltable); } result = new DutchStemFilter(result, origStemdict); return(new TokenStreamComponents(source, result)); } }
/// <summary> /// Creates a TokenStream which tokenizes all the text in the provided TextReader. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns> public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new StopFilter(result, stoptable); result = new DutchStemFilter(result, excltable, _stemdict); return(result); }
/* * Creates a {@link TokenStream} which tokenizes all the text in the * provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} * filtered with {@link StandardFilter}, {@link StopFilter}, * and {@link DutchStemFilter} */ public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = new DutchStemFilter(result, excltable, stemdict); return(result); }
/// <summary> /// Creates a TokenStream which tokenizes all the text in the provided TextReader. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns> public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer( reader ); result = new StandardFilter( result ); result = new StopFilter( result, stoptable ); result = new DutchStemFilter( result, excltable, _stemdict); return result; }
/* * Creates a {@link TokenStream} which tokenizes all the text in the * provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} * filtered with {@link StandardFilter}, {@link StopFilter}, * and {@link DutchStemFilter} */ public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = new DutchStemFilter(result, excltable, stemdict); return result; }
/// <summary> /// Returns a (possibly reused) <seealso cref="TokenStream"/> which tokenizes all the /// text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A <seealso cref="TokenStream"/> built from a <seealso cref="StandardTokenizer"/> /// filtered with <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, /// <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is provided, /// <seealso cref="StemmerOverrideFilter"/>, and <seealso cref="SnowballFilter"/> </returns> public override TokenStreamComponents CreateComponents(string fieldName, TextReader aReader) { #pragma warning disable 612, 618 if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) #pragma warning restore 612, 618 { Tokenizer source = new StandardTokenizer(matchVersion, aReader); TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stoptable); if (excltable.Count > 0) { result = new SetKeywordMarkerFilter(result, excltable); } if (stemdict != null) { result = new StemmerOverrideFilter(result, stemdict); } result = new SnowballFilter(result, new Tartarus.Snowball.Ext.DutchStemmer()); return new TokenStreamComponents(source, result); } else { Tokenizer source = new StandardTokenizer(matchVersion, aReader); TokenStream result = new StandardFilter(matchVersion, source); result = new StopFilter(matchVersion, result, stoptable); if (excltable.Count > 0) { result = new SetKeywordMarkerFilter(result, excltable); } #pragma warning disable 612, 618 result = new DutchStemFilter(result, origStemdict); #pragma warning restore 612, 618 return new TokenStreamComponents(source, result); } }