/// <summary> /// Creates /// <see cref="TokenStreamComponents"/> /// used to tokenize all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> <see cref="TokenStreamComponents"/> /// built from a <see cref="StandardTokenizer"/> filtered with /// <see cref="LowerCaseFilter"/>, <see cref="ArabicNormalizationFilter"/>, /// <see cref="PersianNormalizationFilter"/> and Persian Stop words </returns> protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source; #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) #pragma warning restore 612, 618 { source = new StandardTokenizer(m_matchVersion, reader); } else { #pragma warning disable 612, 618 source = new ArabicLetterTokenizer(m_matchVersion, reader); #pragma warning restore 612, 618 } TokenStream result = new LowerCaseFilter(m_matchVersion, source); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); /* * the order here is important: the stopword list is normalized with the * above! */ return(new TokenStreamComponents(source, new StopFilter(m_matchVersion, result, m_stopwords))); }
private void Check(string input, string expected) { ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); PersianNormalizationFilter filter = new PersianNormalizationFilter(tokenStream); AssertTokenStreamContents(filter, new string[] { expected }); }
private void Check(string input, string expected) { #pragma warning disable 612, 618 ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); #pragma warning restore 612, 618 PersianNormalizationFilter filter = new PersianNormalizationFilter(tokenStream); AssertTokenStreamContents(filter, new string[] { expected }); }
private void Check(String input, String expected) { ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer( new StringReader(input)); PersianNormalizationFilter filter = new PersianNormalizationFilter( tokenStream); AssertTokenStreamContents(filter, new String[] { expected }); }
public override TokenStream TokenStream(string fieldname, TextReader reader) { TokenStream result = new StandardTokenizer(_version, reader); result = new LowerCaseFilter(result); result = new PersianNormalizationFilter(result); result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(_version), result, _stoptable); result = new PersianStemFilter(result); return result; }
public override TokenStream TokenStream(string fieldname, TextReader reader) { TokenStream result = new StandardTokenizer(_version, reader); result = new LowerCaseFilter(result); result = new PersianNormalizationFilter(result); result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(_version), result, _stoptable); result = new PersianStemFilter(result); return(result); }
/* * Creates a {@link TokenStream} which tokenizes all the text in the provided * {@link Reader}. * * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer} * filtered with {@link LowerCaseFilter}, * {@link ArabicNormalizationFilter}, * {@link PersianNormalizationFilter} and Persian Stop words */ public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new ArabicLetterTokenizer(reader); result = new LowerCaseFilter(result); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); /* * the order here is important: the stopword list is normalized with the * above! */ result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); return(result); }
/// <summary> /// Creates /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// used to tokenize all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from a <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="LowerCaseFilter"/>, <seealso cref="ArabicNormalizationFilter"/>, /// <seealso cref="PersianNormalizationFilter"/> and Persian Stop words </returns> public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source; if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) { source = new StandardTokenizer(matchVersion, reader); } else { source = new ArabicLetterTokenizer(matchVersion, reader); } TokenStream result = new LowerCaseFilter(matchVersion, source); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); /* * the order here is important: the stopword list is normalized with the * above! */ return(new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords))); }
/** * Creates a {@link TokenStream} which tokenizes all the text in the provided * {@link Reader}. * * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer} * filtered with {@link LowerCaseFilter}, * {@link ArabicNormalizationFilter}, * {@link PersianNormalizationFilter} and Persian Stop words */ public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new ArabicLetterTokenizer(reader); result = new LowerCaseFilter(result); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); /* * the order here is important: the stopword list is normalized with the * above! */ result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); return result; }