private void Check(string input, string output) { Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); TokenFilter tf = new HindiStemFilter(tokenizer); AssertTokenStreamContents(tf, new string[] { output }); }
/// <summary> /// Creates /// <see cref="Analyzer.TokenStreamComponents"/> /// used to tokenize all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> <see cref="Analyzer.TokenStreamComponents"/> /// built from a <see cref="StandardTokenizer"/> filtered with /// <see cref="LowerCaseFilter"/>, <see cref="IndicNormalizationFilter"/>, /// <see cref="HindiNormalizationFilter"/>, <see cref="SetKeywordMarkerFilter"/> /// if a stem exclusion set is provided, <see cref="HindiStemFilter"/>, and /// Hindi Stop words </returns> protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source; #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_36)) { source = new StandardTokenizer(m_matchVersion, reader); } else { source = new IndicTokenizer(m_matchVersion, reader); } #pragma warning restore 612, 618 TokenStream result = new LowerCaseFilter(m_matchVersion, source); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new IndicNormalizationFilter(result); result = new HindiNormalizationFilter(result); result = new StopFilter(m_matchVersion, result, m_stopwords); result = new HindiStemFilter(result); return(new TokenStreamComponents(source, result)); }