/// <summary> /// Creates a /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// which tokenizes all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from an <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/> /// , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided and <seealso cref="SpanishLightStemFilter"/>. </returns> public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_36)) { result = new SpanishLightStemFilter(result); } else { result = new SnowballFilter(result, new SpanishStemmer()); } return(new TokenStreamComponents(source, result)); }
/// <summary> /// Creates a /// <see cref="TokenStreamComponents"/> /// which tokenizes all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> A /// <see cref="TokenStreamComponents"/> /// built from an <see cref="StandardTokenizer"/> filtered with /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided and <see cref="SpanishLightStemFilter"/>. </returns> protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new StandardTokenizer(m_matchVersion, reader); TokenStream result = new StandardFilter(m_matchVersion, source); result = new LowerCaseFilter(m_matchVersion, result); result = new StopFilter(m_matchVersion, result, m_stopwords); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_36)) #pragma warning restore 612, 618 { result = new SpanishLightStemFilter(result); } else { result = new SnowballFilter(result, new SpanishStemmer()); } return(new TokenStreamComponents(source, result)); }
/// <summary> /// Creates a /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// which tokenizes all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from an <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/> /// , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided and <seealso cref="SpanishLightStemFilter"/>. </returns> public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } #pragma warning disable 612, 618 if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_36)) #pragma warning restore 612, 618 { result = new SpanishLightStemFilter(result); } else { result = new SnowballFilter(result, new SpanishStemmer()); } return new TokenStreamComponents(source, result); }