Normalizes tokens extracted with {@link StandardTokenizer}.
Inheritance: Lucene.Net.Analysis.TokenFilter
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
     TokenStream filterStream = new StandardFilter(tokenizer);
     TokenStream stream = new StopFilter(true, filterStream, _stopWords, true);
     return stream;
 }
 /// <summary>
 /// Constructs a <seealso cref="StandardTokenizer"/> filtered by a {@link
 ///    StandardFilter}, a <seealso cref="LowerCaseFilter"/>, a <seealso cref="StopFilter"/>,
 ///    and a <seealso cref="SnowballFilter"/> 
 /// </summary>
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, tokenizer);
     // remove the possessive 's for english stemmers
     if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && (name.Equals("English") || name.Equals("Porter") || name.Equals("Lovins")))
     {
         result = new EnglishPossessiveFilter(result);
     }
     // Use a special lowercase filter for turkish, the stemmer expects it.
     if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && name.Equals("Turkish"))
     {
         result = new TurkishLowerCaseFilter(result);
     }
     else
     {
         result = new LowerCaseFilter(matchVersion, result);
     }
     if (stopSet != null)
     {
         result = new StopFilter(matchVersion, result, stopSet);
     }
     result = new SnowballFilter(result, name);
     return new TokenStreamComponents(tokenizer, result);
 }
		/// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
		/// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. 
		/// </summary>
		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
		{
			TokenStream result = new StandardTokenizer(reader);
			result = new StandardFilter(result);
			result = new LowerCaseFilter(result);
			result = new StopFilter(result, stopSet);
			return result;
		}
 /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
 /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. 
 /// </summary>
 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     TokenStream result = new StandardTokenizer(reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(result, stopSet);
     return result;
 }
Example #5
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new RuSnowballFilter(result);
     return result;
 }
Example #6
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var tokenizer = new StandardTokenizer(Version.LUCENE_29, reader);
     tokenizer.MaxTokenLength = 255;
     TokenStream filter = new StandardFilter(tokenizer);
     filter = new LowerCaseFilter(filter);
     filter = new StopFilter(false, filter, StandardAnalyzer.STOP_WORDS_SET);
     return new NGramTokenFilter(filter, 2, 6);
 }
Example #7
0
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream ts = new StandardTokenizer(matchVersion, reader);
     ts = new StandardFilter(ts);
     ts = new ThaiWordFilter(ts);
     ts = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                         ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
     return ts;
 }
Example #8
0
 /** Constructs a {@link StandardTokenizer} filtered by a {@link
  * StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}
  * and a {@link SpanishStemFilter}. */
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(Version.LUCENE_24,reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(true,result, stopTable);
     result = new SpanishStemFilter(result);
     return result;
 }
Example #9
0
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream sink = new StandardFilter(source);
     sink = new LowerCaseFilter(sink);
     //sink = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), sink, stopSet);
     sink = new CroatianStemFilter(sink, stemmer);
     return sink;
 }
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new StandardTokenizer(matchVersion, reader);
            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(this.enableStopPositionIncrements, result, stoptable);
            result = new BulgarianStemFilter(result);

            return result;
        }
Example #11
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
            tokenizer.MaxTokenLength = 255;
            TokenStream filter = new StandardFilter(tokenizer);
            filter = new LowerCaseFilter(filter);
            filter = new NGramTokenFilter(filter, 2, 255);

            return filter;
        }
Example #12
0
        public override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
        {
            UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);

            src.MaxTokenLength = maxTokenLength;
            TokenStream tok = new StandardFilter(matchVersion, src);

            tok = new LowerCaseFilter(matchVersion, tok);
            tok = new StopFilter(matchVersion, tok, stopwords);
            return(new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader));
        }
Example #13
0
        /// <summary>Constructs a <see cref="StandardTokenizer" /> filtered by a <see cref="StandardFilter" />
        ///, a <see cref="LowerCaseFilter" /> and a <see cref="StopFilter" />.
        /// </summary>
        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);

            tokenStream.MaxTokenLength = maxTokenLength;
            TokenStream result = new StandardFilter(tokenStream);

            result = new LowerCaseFilter(result);
            result = new StopFilter(enableStopPositionIncrements, result, stopSet);
            return(result);
        }
Example #14
0
 /// <summary>Constructs a <see cref="StandardTokenizer"/> filtered by a {@link
 /// StandardFilter}, a <see cref="LowerCaseFilter"/> and a <see cref="StopFilter"/>. 
 /// </summary>
 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     if (stopSet != null)
         result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                 result, stopSet);
     result = new SnowballFilter(result, name);
     return result;
 }
Example #15
0
        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            var src = new StandardTokenizer(m_matchVersion, reader);

            src.MaxTokenLength = maxTokenLength;
            TokenStream tok = new StandardFilter(m_matchVersion, src);

            tok = new LowerCaseFilter(m_matchVersion, tok);
            tok = new StopFilter(m_matchVersion, tok, m_stopwords);
            return(new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader));
        }
        /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
        /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
        /// </summary>
        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);

            tokenStream.SetMaxTokenLength(maxTokenLength);
            TokenStream result = new StandardFilter(tokenStream);

            result = new LowerCaseFilter(result);
            result = new StopFilter(result, stopSet);
            return(result);
        }
 public override TokenStream TokenStream(string fieldName, TextReader reader) {
     TokenStream result = new StandardTokenizer(this._luceneVersion, reader);
     result = new StandardFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this._luceneVersion),
         result,
         CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)FRENCH_STOP_WORDS, false))
     );
     result = new FrenchStemFilter(result, CharArraySet.EMPTY_SET);
     // Convert to lowercase after stemming!
     result = new LowerCaseFilter(result);
     result = new ASCIIFoldingFilter(result);
     return result;
 }
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            TokenStream result = new StandardTokenizer(kLuceneVersion, reader);

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new ASCIIFoldingFilter(result);
            result = new StopFilter(false, result, StopFilter.MakeStopSet(kEnglishStopWords));
            result = new EdgeNGramTokenFilter(
                result, Lucene.Net.Analysis.NGram.EdgeNGramTokenFilter.Side.FRONT, 1, 20);

            return result;
        }
Example #19
0
        /// <summary>
        /// 
        /// </summary>
        /// <remarks></remarks>
        /// <seealso cref=""/>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <returns></returns>
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            //create the tokenizer
            TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader);

            //add in filters
            result = new StandardFilter(result); // first normalize the StandardTokenizer
            result = new LowerCaseFilter(result);// makes sure everything is lower case
            result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); // use the default list of Stop Words, provided by the StopAnalyzer class.
            result = new SynonymFilter(result, SynonymEngine); // injects the synonyms.

            //return the built token stream.
            return result;
        }
Example #20
0
        /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
        /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
        /// </summary>
        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);

            tokenStream.SetMaxTokenLength(maxTokenLength);
            TokenStream result = new StandardFilter(tokenStream);

            result = new LowerCaseFilter(result);
            if (useDefaultStopPositionIncrements)
            {
                result = new StopFilter(result, stopSet);
            }
            else
            {
                result = new StopFilter(enableStopPositionIncrements, result, stopSet);
            }
            return(result);
        }
Example #21
0
        /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
        /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
        /// </summary>
        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            TokenStream result;

            if (tagsMode)
            {
                result = new TagsTokenizer(reader);
            }
            else
            {
                result = new StandardTokenizer(reader);
            }

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(result, stopSet);
            return(result);
        }
Example #22
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            Tokenizer src = new StandardTokenizer(Version.LUCENE_30, reader);
            TokenStream filter = new StandardFilter(src);
            //var stoplist = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
            HashSet<string> newWords = new HashSet<string>()
            {
                "said", "have", "the", "more", "from", "who", "he", "than", "it", "were", "use", "has", "also",
                "been", "we", "which", "had", "you", "us", "them", "so", "in", "i", "our", "his", "to", "of", "a",
                "st", "ad", "co", "re", "ve", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "f", "g", "it"
            };
            foreach (var var in StopAnalyzer.ENGLISH_STOP_WORDS_SET)
            {
                newWords.Add(var);
            }

            TokenStream result = new StandardTokenizer(Version.LUCENE_30, reader);
            result = new SnowballFilter(result, new EnglishStemmer());
            result = new LowerCaseFilter(result);
            result = new ASCIIFoldingFilter(result);
            result = new StopFilter(true, result, newWords);
            return result;
        }
 /// <summary>
 /// Creates a
 /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
 /// </summary>
 /// <returns> A
 ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 ///         built from an <seealso cref="StandardTokenizer"/> filtered with
 ///         <seealso cref="StandardFilter"/>, <seealso cref="TurkishLowerCaseFilter"/>,
 ///         <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem
 ///         exclusion set is provided and <seealso cref="SnowballFilter"/>. </returns>
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, source);
     if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_48))
     {
         result = new ApostropheFilter(result);
     }
     result = new TurkishLowerCaseFilter(result);
     result = new StopFilter(matchVersion, result, stopwords);
     if (stemExclusionSet.Any())
     {
         result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     }
     result = new SnowballFilter(result, new TurkishStemmer());
     return new TokenStreamComponents(source, result);
 }
Example #24
0
		/// <summary>
		/// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
		/// </summary>
		/// <param name="fieldName"></param>
		/// <param name="reader"></param>
		/// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
		public override TokenStream TokenStream(String fieldName, TextReader reader)
		{
			TokenStream result = new StandardTokenizer( reader );
			result = new StandardFilter( result );
			result = new StopFilter( result, stoptable );
			result = new DutchStemFilter( result, excltable, _stemdict);
			return result;
		}
Example #25
0
	/*
	 * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
	 *
	 * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
	 * 			{@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
	 */
	public override sealed TokenStream TokenStream( String fieldName, TextReader reader ) {
                TokenStream result = new StandardTokenizer( matchVersion, reader );
		result = new StandardFilter( result );
		result = new LowerCaseFilter( result );
		result = new StopFilter( StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                         result, stoptable );
		return result;
	}
Example #26
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     var src = new StandardTokenizer(matchVersion, reader);
     src.MaxTokenLength = maxTokenLength;
     TokenStream tok = new StandardFilter(matchVersion, src);
     tok = new LowerCaseFilter(matchVersion, tok);
     tok = new StopFilter(matchVersion, tok, stopwords);
     return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
 }
 /**
  * Creates a TokenStream which tokenizes all the text in the provided Reader.
  *
  * @return  A TokenStream build from a StandardTokenizer filtered with
  * 			StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter.
  */
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(reader);
     result = new LowerCaseFilter(result);
     result = new StandardFilter(result);
     result = new StopFilter(result, stoptable);
     result = new BrazilianStemFilter(result, excltable);
     return result;
 }
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
        ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
        ///         provided, <seealso cref="GermanNormalizationFilter"/> and <seealso cref="GermanLightStemFilter"/> </returns>
        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer source = new StandardTokenizer(matchVersion, reader);
            TokenStream result = new StandardFilter(matchVersion, source);
            result = new LowerCaseFilter(matchVersion, result);
            result = new StopFilter(matchVersion, result, stopwords);
            result = new SetKeywordMarkerFilter(result, exclusionSet);
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_36))
            {
                result = new GermanNormalizationFilter(result);
                result = new GermanLightStemFilter(result);
            }
            else if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                result = new SnowballFilter(result, new German2Stemmer());
            }
            else
            {
                result = new GermanStemFilter(result);
            }
            return new TokenStreamComponents(source, result);
        }
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
        ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
        ///         provided, and <seealso cref="SnowballFilter"/> </returns>
        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                Tokenizer source = new StandardTokenizer(matchVersion, reader);
                TokenStream result = new StandardFilter(matchVersion, source);
                result = new LowerCaseFilter(matchVersion, result);
                result = new StopFilter(matchVersion, result, stopwords);
                if (stemExclusionSet.Count > 0)
                {
                    result = new SetKeywordMarkerFilter(result, stemExclusionSet);
                }
                result = new SnowballFilter(result, new Tartarus.Snowball.Ext.RussianStemmer());
                return new TokenStreamComponents(source, result);
            }
            else
            {
#pragma warning disable 612, 618
                Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
#pragma warning restore 612, 618
                TokenStream result = new LowerCaseFilter(matchVersion, source);
                result = new StopFilter(matchVersion, result, stopwords);
                if (stemExclusionSet.Count > 0)
                {
                    result = new SetKeywordMarkerFilter(result, stemExclusionSet);
                }
                result = new SnowballFilter(result, new Tartarus.Snowball.Ext.RussianStemmer());
                return new TokenStreamComponents(source, result);
            }
        }
		/// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
		/// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. 
		/// </summary>
		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
		{
			StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
			tokenStream.SetMaxTokenLength(maxTokenLength);
			TokenStream result = new StandardFilter(tokenStream);
			result = new LowerCaseFilter(result);
			if (useDefaultStopPositionIncrements)
			{
				result = new StopFilter(result, stopSet);
			}
			else
			{
				result = new StopFilter(enableStopPositionIncrements, result, stopSet);
			}
			return result;
		}
 /// <summary>
 /// Creates a
 /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
 /// </summary>
 /// <returns> A
 ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 ///         built from an <seealso cref="StandardTokenizer"/> filtered with
 ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
 ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
 ///         provided and <seealso cref="SnowballFilter"/>. </returns>
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, source);
     result = new LowerCaseFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if (stemExclusionSet.Count > 0)
     {
         result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     }
     result = new SnowballFilter(result, new DanishStemmer());
     return new TokenStreamComponents(source, result);
 }
 /// <summary>
 /// Creates a
 /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
 /// </summary>
 /// <returns> A
 ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 ///         built from an <seealso cref="StandardTokenizer"/> filtered with
 ///         <seealso cref="StandardFilter"/>, <seealso cref="IrishLowerCaseFilter"/>, <seealso cref="StopFilter"/>
 ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
 ///         provided and <seealso cref="SnowballFilter"/>. </returns>
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, source);
     StopFilter s = new StopFilter(matchVersion, result, HYPHENATIONS);
     #pragma warning disable 612, 618
     if (!matchVersion.OnOrAfter(LuceneVersion.LUCENE_44))
     #pragma warning restore 612, 618
     {
         s.EnablePositionIncrements = false;
     }
     result = s;
     result = new ElisionFilter(result, DEFAULT_ARTICLES);
     result = new IrishLowerCaseFilter(result);
     result = new StopFilter(matchVersion, result, stopwords);
     if (stemExclusionSet.Count > 0)
     {
         result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     }
     result = new SnowballFilter(result, new IrishStemmer());
     return new TokenStreamComponents(source, result);
 }
 /// <summary>
 /// Creates
 /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
 /// </summary>
 /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 ///         built from a <seealso cref="StandardTokenizer"/> filtered with
 ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StandardFilter"/>, <seealso cref="StopFilter"/>
 ///         , and <seealso cref="BrazilianStemFilter"/>. </returns>
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new LowerCaseFilter(matchVersion, source);
     result = new StandardFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if (excltable != null && excltable.Count > 0)
     {
         result = new SetKeywordMarkerFilter(result, excltable);
     }
     return new TokenStreamComponents(source, new BrazilianStemFilter(result));
 }
Example #34
0
 /// <summary>
 /// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
 /// </summary>
 /// <param name="fieldName"></param>
 /// <param name="reader"></param>
 /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
     result = new GermanStemFilter(result, exclusionSet, _normalizeDin2);
     return result;
 }
		/// <summary>Constructs a <see cref="StandardTokenizer" /> filtered by a <see cref="StandardFilter" />
		///, a <see cref="LowerCaseFilter" /> and a <see cref="StopFilter" />. 
		/// </summary>
		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
		{
			StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
			tokenStream.MaxTokenLength = maxTokenLength;
			TokenStream result = new StandardFilter(tokenStream);
			result = new LowerCaseFilter(result);
			result = new StopFilter(enableStopPositionIncrements, result, stopSet);
			return result;
		}
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="GreekLowerCaseFilter"/>, <seealso cref="StandardFilter"/>,
        ///         <seealso cref="StopFilter"/>, and <seealso cref="GreekStemFilter"/> </returns>
        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer source = new StandardTokenizer(matchVersion, reader);
            TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
            {
                result = new StandardFilter(matchVersion, result);
            }
            result = new StopFilter(matchVersion, result, stopwords);
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                result = new GreekStemFilter(result);
            }
            return new TokenStreamComponents(source, result);
        }
Example #37
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new StandardTokenizer(this.matchVersion, reader);
            result = new LowerCaseFilter(result);
            result = new StandardFilter(result);
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this.matchVersion), result, this.stoptable);
            result = new BrazilianStemFilterCustom(result, this.excltable);

            return result;
        }