Inheritance: Lucene.Net.Analysis.TokenFilter
Beispiel #1
0
        private void Check(string input, string expected)
        {
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(new StringReader(input));
            ArabicStemFilter      filter      = new ArabicStemFilter(tokenStream);

            AssertTokenStreamContents(filter, new String[] { expected });
        }
Beispiel #2
0
        //DIGY
        ///**
        // * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
        // *
        // * @deprecated Use {@link #ArabicAnalyzer(Version, File)} instead
        // */
        //public ArabicAnalyzer(File stopwords)
        //{
        //    this(Version.LUCENE_24, stopwords);
        //}

        ///**
        // * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
        // */
        //public ArabicAnalyzer(Version matchVersion, File stopwords)
        //{
        //    stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT);
        //    this.matchVersion = matchVersion;
        //}


        /**
         * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
         *          {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
         *            and {@link ArabicStemFilter}.
         */
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new ArabicLetterTokenizer(reader);

            result = new LowerCaseFilter(result);
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable);
            result = new ArabicNormalizationFilter(result);
            result = new ArabicStemFilter(result);

            return(result);
        }
 private void Check(string input, string expected)
 {
     ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(new StringReader(input));
     ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
     AssertTokenStreamContents(filter, new String[] { expected });
 }
        /*
         * Creates a <see cref="TokenStream"/> which tokenizes all the text in the provided <see cref="TextReader"/>.
         *
         * <returns>A <see cref="TokenStream"/> built from an <see cref="ArabicLetterTokenizer"/> filtered with
         *             <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, <see cref="ArabicNormalizationFilter"/>
         *            and <see cref="ArabicStemFilter"/>.</returns>
         */
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new ArabicLetterTokenizer(reader);
            result = new LowerCaseFilter(result);
            // the order here is important: the stopword list is not normalized!
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable);
            result = new ArabicNormalizationFilter(result);
            result = new ArabicStemFilter(result);

            return result;
        }