示例#1
0
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
        ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
        ///         provided, and <seealso cref="SnowballFilter"/> </returns>
        protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
        {
            if (matchVersion.onOrAfter(Version.LUCENE_31))
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
                Tokenizer   source = new StandardTokenizer(matchVersion, reader);
                TokenStream result = new StandardFilter(matchVersion, source);
                result = new LowerCaseFilter(matchVersion, result);
                result = new StopFilter(matchVersion, result, stopwords);
                if (!stemExclusionSet.Empty)
                {
                    result = new SetKeywordMarkerFilter(result, stemExclusionSet);
                }
                result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
                return(new TokenStreamComponents(source, result));
            }
            else
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
                Tokenizer   source = new RussianLetterTokenizer(matchVersion, reader);
                TokenStream result = new LowerCaseFilter(matchVersion, source);
                result = new StopFilter(matchVersion, result, stopwords);
                if (!stemExclusionSet.Empty)
                {
                    result = new SetKeywordMarkerFilter(result, stemExclusionSet);
                }
                result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
                return(new TokenStreamComponents(source, result));
            }
        }
示例#2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRussianLetterTokenizerBWCompat() throws java.io.IOException
        public virtual void testRussianLetterTokenizerBWCompat()
        {
            StringReader           reader    = new StringReader("1234567890 Вместе \ud801\udc1ctest");
            RussianLetterTokenizer tokenizer = new RussianLetterTokenizer(Version.LUCENE_30, reader);

            assertTokenStreamContents(tokenizer, new string[] { "1234567890", "Вместе", "test" });
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testRussianLetterTokenizerBWCompat() throws java.io.IOException
 public virtual void testRussianLetterTokenizerBWCompat()
 {
     StringReader reader = new StringReader("1234567890 Вместе \ud801\udc1ctest");
     RussianLetterTokenizer tokenizer = new RussianLetterTokenizer(Version.LUCENE_30, reader);
     assertTokenStreamContents(tokenizer, new string[] {"1234567890", "Вместе", "test"});
 }
示例#4
0
 /// <summary>
 /// Creates
 /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
 /// </summary>
 /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 ///         built from a <seealso cref="StandardTokenizer"/> filtered with
 ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
 ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
 ///         provided, and <seealso cref="SnowballFilter"/> </returns>
 protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
 {
     if (matchVersion.onOrAfter(Version.LUCENE_31))
       {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, source);
     result = new LowerCaseFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if (!stemExclusionSet.Empty)
     {
         result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     }
     result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
     return new TokenStreamComponents(source, result);
       }
       else
       {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
     Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
     TokenStream result = new LowerCaseFilter(matchVersion, source);
     result = new StopFilter(matchVersion, result, stopwords);
     if (!stemExclusionSet.Empty)
     {
         result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     }
     result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
     return new TokenStreamComponents(source, result);
       }
 }