public virtual void TestWithKeywordAttribute()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("fischen");
     GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Fischen Trinken")), set));
     AssertTokenStreamContents(filter, new string[] { "fischen", "trink" });
 }
Пример #2
0
        /// <summary>
        /// Creates
        /// <see cref="TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
        /// </summary>
        /// <returns> <see cref="TokenStreamComponents"/>
        ///         built from a <see cref="StandardTokenizer"/> filtered with
        ///         <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
        ///         <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
        ///         provided, <see cref="GermanNormalizationFilter"/> and <see cref="GermanLightStemFilter"/> </returns>
        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer   source = new StandardTokenizer(m_matchVersion, reader);
            TokenStream result = new StandardFilter(m_matchVersion, source);

            result = new LowerCaseFilter(m_matchVersion, result);
            result = new StopFilter(m_matchVersion, result, m_stopwords);
            result = new SetKeywordMarkerFilter(result, exclusionSet);
#pragma warning disable 612, 618
            if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_36))
            {
                result = new GermanNormalizationFilter(result);
                result = new GermanLightStemFilter(result);
            }
            else if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                result = new SnowballFilter(result, new German2Stemmer());
            }
            else
            {
                result = new GermanStemFilter(result);
            }
            return(new TokenStreamComponents(source, result));
        }
Пример #3
0
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("fischen");
            GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Fischen Trinken")), set));

            AssertTokenStreamContents(filter, new string[] { "fischen", "trink" });
        }
Пример #4
0
        /// <summary>
        /// Creates a TokenStream which tokenizes all the text in the provided TextReader.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
        public override TokenStream TokenStream(String fieldName, TextReader reader)
        {
            TokenStream result = new StandardTokenizer(matchVersion, reader);

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
            result = new GermanStemFilter(result, exclusionSet, _normalizeDin2);
            return(result);
        }
Пример #5
0
        /// <summary>
        /// Creates a TokenStream which tokenizes all the text in the provided TextReader.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
        public override TokenStream TokenStream(String fieldName, TextReader reader)
        {
            TokenStream result = new StandardTokenizer(reader);

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(result, stoptable);
            result = new GermanStemFilter(result, excltable);
            return(result);
        }
Пример #6
0
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
        ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
        ///         provided, <seealso cref="GermanNormalizationFilter"/> and <seealso cref="GermanLightStemFilter"/> </returns>
        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer source = new StandardTokenizer(matchVersion, reader);
            TokenStream result = new StandardFilter(matchVersion, source);
            result = new LowerCaseFilter(matchVersion, result);
            result = new StopFilter(matchVersion, result, stopwords);
            result = new SetKeywordMarkerFilter(result, exclusionSet);
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_36))
            {
                result = new GermanNormalizationFilter(result);
                result = new GermanLightStemFilter(result);
            }
            else if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                result = new SnowballFilter(result, new German2Stemmer());
            }
            else
            {
                result = new GermanStemFilter(result);
            }
            return new TokenStreamComponents(source, result);
        }
Пример #7
0
		/// <summary>
		/// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
		/// </summary>
		/// <param name="fieldName"></param>
		/// <param name="reader"></param>
		/// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
		public override TokenStream TokenStream(String fieldName, TextReader reader)
		{
			TokenStream result = new StandardTokenizer( reader );
			result = new StandardFilter( result );
			result = new LowerCaseFilter(result);
			result = new StopFilter( result, stoptable );
			result = new GermanStemFilter( result, excltable );
			return result;
		}
Пример #8
0
 /// <summary>
 /// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
 /// </summary>
 /// <param name="fieldName"></param>
 /// <param name="reader"></param>
 /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
     result = new GermanStemFilter(result, exclusionSet, _normalizeDin2);
     return result;
 }