示例#1
0
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            //create the tokenizer
            TokenStream result = new StandardTokenizer(CURRENT_VERSION, reader);

            //add in filters
            result = new Lucene.Net.Analysis.Snowball.SnowballFilter(result, new EnglishStemmer());

            //add in filters
            // first normalize the StandardTokenizer
            //-result = new StandardFilter(result);

            // makes sure everything is lower case
            result = new LowerCaseFilter(result);

            result = new ASCIIFoldingFilter(result);

            // use the default list of Stop Words, provided by the StopAnalyzer class.
            //-result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            result = new StopFilter(true, result, new HashSet <string>());


            //return the built token stream.
            return(result);
        }
示例#2
0
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            //create the tokenizer
            TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader);

            //add in filters
            result = new Lucene.Net.Analysis.Snowball.SnowballFilter(result, new PortugueseStemmer());
            result = new LowerCaseFilter(result);
            result = new ASCIIFoldingFilter(result);
            result = new StopFilter(true, result, EnglishStopWords.GetEnglishStopWords());
            return(result);
        }