상속: TokenFilter
예제 #1
0
 public void TestElision2()
 {
     String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
     Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
     HashSet<String> articles = new HashSet<String>();
     articles.Add("l");
     articles.Add("M");
     TokenFilter filter = new ElisionFilter(tokenizer, articles);
     List<string> tas = Filtre(filter);
     Assert.AreEqual("embrouille", tas[4]);
     Assert.AreEqual("O'brian", tas[6]);
     Assert.AreEqual("enfin", tas[7]);
 }
예제 #2
0
        /// <summary>
        /// Creates
        /// <see cref="TokenStreamComponents"/>
        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
        /// </summary>
        /// <returns> <see cref="TokenStreamComponents"/>
        ///         built from a <see cref="StandardTokenizer"/> filtered with
        ///         <see cref="StandardFilter"/>, <see cref="ElisionFilter"/>,
        ///         <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
        ///         <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
        ///         provided, and <see cref="FrenchLightStemFilter"/> </returns>
        ///
        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
#pragma warning disable 612, 618
            if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
            {
                Tokenizer   source = new StandardTokenizer(m_matchVersion, reader);
                TokenStream result = new StandardFilter(m_matchVersion, source);
                result = new ElisionFilter(result, DEFAULT_ARTICLES);
                result = new LowerCaseFilter(m_matchVersion, result);
                result = new StopFilter(m_matchVersion, result, m_stopwords);
                if (excltable.Count > 0)
                {
                    result = new SetKeywordMarkerFilter(result, excltable);
                }
#pragma warning disable 612, 618
                if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_36))
#pragma warning restore 612, 618
                {
                    result = new FrenchLightStemFilter(result);
                }
                else
                {
                    result = new SnowballFilter(result, new Tartarus.Snowball.Ext.FrenchStemmer());
                }
                return(new TokenStreamComponents(source, result));
            }
            else
            {
                Tokenizer   source = new StandardTokenizer(m_matchVersion, reader);
                TokenStream result = new StandardFilter(m_matchVersion, source);
                result = new StopFilter(m_matchVersion, result, m_stopwords);
                if (excltable.Count > 0)
                {
                    result = new SetKeywordMarkerFilter(result, excltable);
                }
#pragma warning disable 612, 618
                result = new FrenchStemFilter(result);
#pragma warning restore 612, 618
                // Convert to lowercase after stemming!
                return(new TokenStreamComponents(source, new LowerCaseFilter(m_matchVersion, result)));
            }
        }