Normalizes tokens extracted with StandardTokenizer.
Inheritance: TokenFilter
コード例 #1
0
 protected override TokenStreamComponents createComponents(string field, java.io.Reader reader)
 {
     var tokenizer = new PathTokenizer(reader);
     TokenStream tokenStream = new StandardFilter(tokenizer);
     tokenStream = new LowerCaseFilter(tokenStream);
     tokenStream = new StopFilter(tokenStream, StandardAnalyzer.STOP_WORDS_SET);
     return new TokenStreamComponents(tokenizer, tokenStream);
 }
コード例 #2
0
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
        protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
            UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);

            src.MaxTokenLength = maxTokenLength;
            TokenStream tok = new StandardFilter(matchVersion, src);

            tok = new LowerCaseFilter(matchVersion, tok);
            tok = new StopFilter(matchVersion, tok, stopwords);
            return(new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader));
        }
コード例 #3
0
 /// <summary>
 /// Constructs a <seealso cref="StandardTokenizer"/> filtered by a {@link
 ///    StandardFilter}, a <seealso cref="LowerCaseFilter"/>, a <seealso cref="StopFilter"/>,
 ///    and a <seealso cref="SnowballFilter"/> 
 /// </summary>
 public override TokenStreamComponents createComponents(string fieldName, Reader reader)
 {
     Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, tokenizer);
     // remove the possessive 's for english stemmers
     if (matchVersion.onOrAfter(Version.LUCENE_31) && (name.Equals("English") || name.Equals("Porter") || name.Equals("Lovins")))
     {
       result = new EnglishPossessiveFilter(result);
     }
     // Use a special lowercase filter for turkish, the stemmer expects it.
     if (matchVersion.onOrAfter(Version.LUCENE_31) && name.Equals("Turkish"))
     {
       result = new TurkishLowerCaseFilter(result);
     }
     else
     {
       result = new LowerCaseFilter(matchVersion, result);
     }
     if (stopSet != null)
     {
       result = new StopFilter(matchVersion, result, stopSet);
     }
     result = new SnowballFilter(result, name);
     return new TokenStreamComponents(tokenizer, result);
 }