Esempio n. 1
0
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="IndicNormalizationFilter"/>,
        ///         <seealso cref="HindiNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
        ///         if a stem exclusion set is provided, <seealso cref="HindiStemFilter"/>, and
        ///         Hindi Stop words </returns>
        protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source;
            Tokenizer source;

            if (matchVersion.onOrAfter(Version.LUCENE_36))
            {
                source = new StandardTokenizer(matchVersion, reader);
            }
            else
            {
                source = new IndicTokenizer(matchVersion, reader);
            }
            TokenStream result = new LowerCaseFilter(matchVersion, source);

            if (!stemExclusionSet.Empty)
            {
                result = new SetKeywordMarkerFilter(result, stemExclusionSet);
            }
            result = new IndicNormalizationFilter(result);
            result = new HindiNormalizationFilter(result);
            result = new StopFilter(matchVersion, result, stopwords);
            result = new HindiStemFilter(result);
            return(new TokenStreamComponents(source, result));
        }
Esempio n. 2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void check(String input, String output) throws java.io.IOException
        private void check(string input, string output)
        {
            Tokenizer   tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            TokenFilter tf        = new HindiStemFilter(tokenizer);

            assertTokenStreamContents(tf, new string[] { output });
        }
Esempio n. 3
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: private void check(String input, String output) throws java.io.IOException
 private void check(string input, string output)
 {
     Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     TokenFilter tf = new HindiStemFilter(tokenizer);
     assertTokenStreamContents(tf, new string[] {output});
 }
Esempio n. 4
0
 /// <summary>
 /// Creates
 /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
 /// </summary>
 /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
 ///         built from a <seealso cref="StandardTokenizer"/> filtered with
 ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="IndicNormalizationFilter"/>,
 ///         <seealso cref="HindiNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
 ///         if a stem exclusion set is provided, <seealso cref="HindiStemFilter"/>, and
 ///         Hindi Stop words </returns>
 protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
 {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source;
     Tokenizer source;
     if (matchVersion.onOrAfter(Version.LUCENE_36))
     {
       source = new StandardTokenizer(matchVersion, reader);
     }
     else
     {
       source = new IndicTokenizer(matchVersion, reader);
     }
     TokenStream result = new LowerCaseFilter(matchVersion, source);
     if (!stemExclusionSet.Empty)
     {
       result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     }
     result = new IndicNormalizationFilter(result);
     result = new HindiNormalizationFilter(result);
     result = new StopFilter(matchVersion, result, stopwords);
     result = new HindiStemFilter(result);
     return new TokenStreamComponents(source, result);
 }