A filter that stems Russian words. The implementation was inspired by GermanStemFilter. The input should be filtered by RussianLowerCaseFilter before passing it to RussianStemFilter, because RussianStemFilter only works with lowercase part of any "russian" charset.
Inheritance: Lucene.Net.Analysis.TokenFilter
コード例 #1
0
        /// <summary>
        /// Creates a TokenStream which tokenizes all the text in the provided TextReader.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <returns>
        ///		A TokenStream build from a RussianLetterTokenizer filtered with
        ///     RussianLowerCaseFilter, StopFilter, and RussianStemFilter
        ///  </returns>
        public override TokenStream TokenStream(String fieldName, TextReader reader)
        {
            TokenStream result = new RussianLetterTokenizer(reader, charset);

            result = new RussianLowerCaseFilter(result, charset);
            result = new StopFilter(result, stoptable);
            result = new RussianStemFilter(result, charset);
            return(result);
        }
コード例 #2
0
ファイル: RussianAnalyzer.cs プロジェクト: mundher/lucene.net
        /*
         * Creates a {@link TokenStream} which tokenizes all the text in the
         * provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from a
         *   {@link RussianLetterTokenizer} filtered with
         *   {@link RussianLowerCaseFilter}, {@link StopFilter},
         *   and {@link RussianStemFilter}
         */
        public override TokenStream TokenStream(String fieldName, TextReader reader)
        {
            TokenStream result = new RussianLetterTokenizer(reader);

            result = new LowerCaseFilter(result);
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                    result, stopSet);
            result = new RussianStemFilter(result);
            return(result);
        }
コード例 #3
0
ファイル: RussianAnalyzer.cs プロジェクト: raol/lucene.net
 /*
  * Creates a {@link TokenStream} which tokenizes all the text in the 
  * provided {@link Reader}.
  *
  * @return  A {@link TokenStream} built from a 
  *   {@link RussianLetterTokenizer} filtered with 
  *   {@link RussianLowerCaseFilter}, {@link StopFilter}, 
  *   and {@link RussianStemFilter}
  */
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream result = new RussianLetterTokenizer(reader);
     result = new LowerCaseFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                             result, stopSet);
     result = new RussianStemFilter(result);
     return result;
 }
コード例 #4
0
		/// <summary>
		/// Creates a TokenStream which tokenizes all the text in the provided TextReader.
		/// </summary>
		/// <param name="fieldName"></param>
		/// <param name="reader"></param>
		/// <returns>
		///		A TokenStream build from a RussianLetterTokenizer filtered with
		///     RussianLowerCaseFilter, StopFilter, and RussianStemFilter
		///  </returns>
		public override TokenStream TokenStream(String fieldName, TextReader reader)
		{
			TokenStream result = new RussianLetterTokenizer(reader, charset);
			result = new RussianLowerCaseFilter(result, charset);
			result = new StopFilter(result, stoptable);
			result = new RussianStemFilter(result, charset);
			return result;
		}