A TokenFilter that applies GreekStemmer to stem Greek words.

To prevent terms from being stemmed use an instance of SetKeywordMarkerFilter or a custom TokenFilter that sets the KeywordAttribute before this TokenStream.

NOTE: Input is expected to be casefolded for Greek (including folding of final sigma to sigma), and with diacritics removed. This can be achieved by using either GreekLowerCaseFilter or ICUFoldingFilter before GreekStemFilter. @lucene.experimental

Inheritance: TokenFilter
Esempio n. 1
0
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="GreekLowerCaseFilter"/>, <seealso cref="StandardFilter"/>,
        ///         <seealso cref="StopFilter"/>, and <seealso cref="GreekStemFilter"/> </returns>
        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer   source = new StandardTokenizer(matchVersion, reader);
            TokenStream result = new GreekLowerCaseFilter(matchVersion, source);

            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
            {
                result = new StandardFilter(matchVersion, result);
            }
            result = new StopFilter(matchVersion, result, stopwords);
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
            {
                result = new GreekStemFilter(result);
            }
            return(new TokenStreamComponents(source, result));
        }
Esempio n. 2
0
        /// <summary>
        /// Creates
        /// <see cref="TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
        /// </summary>
        /// <returns> <see cref="TokenStreamComponents"/>
        ///         built from a <see cref="StandardTokenizer"/> filtered with
        ///         <see cref="GreekLowerCaseFilter"/>, <see cref="StandardFilter"/>,
        ///         <see cref="StopFilter"/>, and <see cref="GreekStemFilter"/> </returns>
        protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer   source = new StandardTokenizer(m_matchVersion, reader);
            TokenStream result = new GreekLowerCaseFilter(m_matchVersion, source);

#pragma warning disable 612, 618
            if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
            {
                result = new StandardFilter(m_matchVersion, result);
            }
            result = new StopFilter(m_matchVersion, result, m_stopwords);
            if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                result = new GreekStemFilter(result);
            }
            return(new TokenStreamComponents(source, result));
        }
Esempio n. 3
0
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="GreekLowerCaseFilter"/>, <seealso cref="StandardFilter"/>,
        ///         <seealso cref="StopFilter"/>, and <seealso cref="GreekStemFilter"/> </returns>
        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer source = new StandardTokenizer(matchVersion, reader);
            TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
            {
                result = new StandardFilter(matchVersion, result);
            }
            result = new StopFilter(matchVersion, result, stopwords);
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                result = new GreekStemFilter(result);
            }
            return new TokenStreamComponents(source, result);
        }