Exemplo n.º 1
0
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("hole");
            CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));

            AssertTokenStreamContents(filter, new string[] { "hole", "desk" });
        }
Exemplo n.º 2
0
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
        ///         , and <seealso cref="CzechStemFilter"/> (only if version is >= LUCENE_31). If
        ///         a version is >= LUCENE_31 and a stem exclusion set is provided via
        ///         <seealso cref="#CzechAnalyzer(Version, CharArraySet, CharArraySet)"/> a
        ///         <seealso cref="SetKeywordMarkerFilter"/> is added before
        ///         <seealso cref="CzechStemFilter"/>. </returns>

        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer   source = new StandardTokenizer(matchVersion, reader);
            TokenStream result = new StandardFilter(matchVersion, source);

            result = new LowerCaseFilter(matchVersion, result);
            result = new StopFilter(matchVersion, result, stopwords);
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
            {
                if (this.stemExclusionTable.Any())
                {
                    result = new SetKeywordMarkerFilter(result, stemExclusionTable);
                }
                result = new CzechStemFilter(result);
            }
            return(new TokenStreamComponents(source, result));
        }
Exemplo n.º 3
0
        /// <summary>
        /// Creates
        /// <see cref="TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
        /// </summary>
        /// <returns> <see cref="TokenStreamComponents"/>
        ///         built from a <see cref="StandardTokenizer"/> filtered with
        ///         <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
        ///         and <see cref="CzechStemFilter"/> (only if version is >= LUCENE_31). If
        ///         a version is >= LUCENE_31 and a stem exclusion set is provided via
        ///         <see cref="CzechAnalyzer(LuceneVersion, CharArraySet, CharArraySet)"/> a
        ///         <see cref="SetKeywordMarkerFilter"/> is added before
        ///         <see cref="CzechStemFilter"/>. </returns>

        protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer   source = new StandardTokenizer(m_matchVersion, reader);
            TokenStream result = new StandardFilter(m_matchVersion, source);

            result = new LowerCaseFilter(m_matchVersion, result);
            result = new StopFilter(m_matchVersion, result, m_stopwords);
#pragma warning disable 612, 618
            if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                if (this.stemExclusionTable.Count > 0)
                {
                    result = new SetKeywordMarkerFilter(result, stemExclusionTable);
                }
                result = new CzechStemFilter(result);
            }
            return(new TokenStreamComponents(source, result));
        }
Exemplo n.º 4
0
 public virtual void TestWithKeywordAttribute()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("hole");
     CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
     AssertTokenStreamContents(filter, new string[] { "hole", "desk" });
 }
Exemplo n.º 5
0
        /// <summary>
        /// Creates
        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
        /// </summary>
        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
        ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
        ///         , and <seealso cref="CzechStemFilter"/> (only if version is >= LUCENE_31). If
        ///         a version is >= LUCENE_31 and a stem exclusion set is provided via
        ///         <seealso cref="#CzechAnalyzer(Version, CharArraySet, CharArraySet)"/> a
        ///         <seealso cref="SetKeywordMarkerFilter"/> is added before
        ///         <seealso cref="CzechStemFilter"/>. </returns>

        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer source = new StandardTokenizer(matchVersion, reader);
            TokenStream result = new StandardFilter(matchVersion, source);
            result = new LowerCaseFilter(matchVersion, result);
            result = new StopFilter(matchVersion, result, stopwords);
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                if (this.stemExclusionTable.Any())
                {
                    result = new SetKeywordMarkerFilter(result, stemExclusionTable);
                }
                result = new CzechStemFilter(result);
            }
            return new TokenStreamComponents(source, result);
        }