Inheritance: Lucene.Net.Analysis.LetterTokenizer
Example #1
0
        private void Check(string input, string expected)
        {
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(new StringReader(input));
            ArabicStemFilter      filter      = new ArabicStemFilter(tokenStream);

            AssertTokenStreamContents(filter, new String[] { expected });
        }
 private void Check(String input, String expected)
 {
     ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(
         new StringReader(input));
     PersianNormalizationFilter filter = new PersianNormalizationFilter(
         tokenStream);
     AssertTokenStreamContents(filter, new String[] { expected });
 }
Example #3
0
        //DIGY
        ///**
        // * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
        // *
        // * @deprecated Use {@link #ArabicAnalyzer(Version, File)} instead
        // */
        //public ArabicAnalyzer(File stopwords)
        //{
        //    this(Version.LUCENE_24, stopwords);
        //}

        ///**
        // * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
        // */
        //public ArabicAnalyzer(Version matchVersion, File stopwords)
        //{
        //    stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT);
        //    this.matchVersion = matchVersion;
        //}


        /**
         * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
         *          {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
         *            and {@link ArabicStemFilter}.
         */
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new ArabicLetterTokenizer(reader);

            result = new LowerCaseFilter(result);
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable);
            result = new ArabicNormalizationFilter(result);
            result = new ArabicStemFilter(result);

            return(result);
        }
Example #4
0
 /**
  * Creates a {@link TokenStream} which tokenizes all the text in the provided
  * {@link Reader}.
  * 
  * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
  *         filtered with {@link LowerCaseFilter}, 
  *         {@link ArabicNormalizationFilter},
  *         {@link PersianNormalizationFilter} and Persian Stop words
  */
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream result = new ArabicLetterTokenizer(reader);
     result = new LowerCaseFilter(result);
     result = new ArabicNormalizationFilter(result);
     /* additional persian-specific normalization */
     result = new PersianNormalizationFilter(result);
     /*
      * the order here is important: the stopword list is normalized with the
      * above!
      */
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                             result, stoptable);
     return result;
 }
 private void Check(string input, string expected)
 {
     ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(new StringReader(input));
     ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
     AssertTokenStreamContents(filter, new String[] { expected });
 }
Example #6
0
        /*
         * Creates a <see cref="TokenStream"/> which tokenizes all the text in the provided <see cref="TextReader"/>.
         *
         * <returns>A <see cref="TokenStream"/> built from an <see cref="ArabicLetterTokenizer"/> filtered with
         *             <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, <see cref="ArabicNormalizationFilter"/>
         *            and <see cref="ArabicStemFilter"/>.</returns>
         */
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new ArabicLetterTokenizer(reader);
            result = new LowerCaseFilter(result);
            // the order here is important: the stopword list is not normalized!
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable);
            result = new ArabicNormalizationFilter(result);
            result = new ArabicStemFilter(result);

            return result;
        }