Ejemplo n.º 1
0
        /// <summary>
        /// Creates a
        /// <see cref="TokenStreamComponents"/>
        /// which tokenizes all the text in the provided <see cref="TextReader"/>.
        /// </summary>
        /// <returns> A
        ///         <see cref="TokenStreamComponents"/>
        ///         built from an <see cref="StandardTokenizer"/> filtered with
        ///         <see cref="StandardFilter"/>, <see cref="IrishLowerCaseFilter"/>, <see cref="StopFilter"/>,
        ///         <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
        ///         provided and <see cref="SnowballFilter"/>. </returns>
        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer   source = new StandardTokenizer(m_matchVersion, reader);
            TokenStream result = new StandardFilter(m_matchVersion, source);
            StopFilter  s      = new StopFilter(m_matchVersion, result, HYPHENATIONS);

#pragma warning disable 612, 618
            if (!m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_44))
            {
                s.SetEnablePositionIncrements(false);
            }
#pragma warning restore 612, 618
            result = s;
            result = new ElisionFilter(result, DEFAULT_ARTICLES);
            result = new IrishLowerCaseFilter(result);
            result = new StopFilter(m_matchVersion, result, m_stopwords);
            if (stemExclusionSet.Count > 0)
            {
                result = new SetKeywordMarkerFilter(result, stemExclusionSet);
            }
            result = new SnowballFilter(result, new IrishStemmer());
            return(new TokenStreamComponents(source, result));
        }