public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false); BulgarianStemFilter filter = new BulgarianStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); AssertTokenStreamContents(filter, new string[] { "строй", "строеве" }); }
/// <summary> /// Creates a /// <see cref="Analyzer.TokenStreamComponents"/> /// which tokenizes all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> A /// <see cref="Analyzer.TokenStreamComponents"/> /// built from an <see cref="StandardTokenizer"/> filtered with /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided and <see cref="BulgarianStemFilter"/>. </returns> protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new StandardTokenizer(m_matchVersion, reader); TokenStream result = new StandardFilter(m_matchVersion, source); result = new LowerCaseFilter(m_matchVersion, result); result = new StopFilter(m_matchVersion, result, m_stopwords); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new BulgarianStemFilter(result); return(new TokenStreamComponents(source, result)); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false); BulgarianStemFilter filter = new BulgarianStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); AssertTokenStreamContents(filter, new string[] { "строй", "строеве" }); }