public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { //create the tokenizer TokenStream result = new StandardTokenizer(CURRENT_VERSION, reader); //add in filters result = new Lucene.Net.Analysis.Snowball.SnowballFilter(result, new EnglishStemmer()); //add in filters // first normalize the StandardTokenizer //-result = new StandardFilter(result); // makes sure everything is lower case result = new LowerCaseFilter(result); result = new ASCIIFoldingFilter(result); // use the default list of Stop Words, provided by the StopAnalyzer class. //-result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); result = new StopFilter(true, result, new HashSet <string>()); //return the built token stream. return(result); }
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { //create the tokenizer TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader); //add in filters result = new Lucene.Net.Analysis.Snowball.SnowballFilter(result, new PortugueseStemmer()); result = new LowerCaseFilter(result); result = new ASCIIFoldingFilter(result); result = new StopFilter(true, result, EnglishStopWords.GetEnglishStopWords()); return(result); }