/// <summary> /// 将word取出词干,支持停用词 /// </summary> /// <param name="word"></param> /// <param name="language"></param> /// <returns></returns> public static string SnowballWord(string word, string language) { string result = null; string stemmer = SnowballDict.GetStemmer(language); if (stemmer == null) { result = word; } else { using (SnowballAnalyzer snowball = new SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, stemmer, StopWord.StopWordList)) { using (TokenStream ts = snowball.ReusableTokenStream("", new StringReader(word)))//只显示分词信息,不需要使用FieldName { while (ts.IncrementToken()) { ITermAttribute attribute = ts.GetAttribute <ITermAttribute>(); result = attribute.Term; } } } } return(result); }
public override TokenStream TokenStream(string fieldName, TextReader reader) { if (!this._Language.Equals("JA", StringComparison.CurrentCultureIgnoreCase)) {//不是日语的需要重建Reader。wangyunpeng reader = this.InitReader(reader); } TokenStream result = this._Analyzer.TokenStream(fieldName, reader); result = new StopFilter(this._EnableStopPositionIncrements, result, this._StopCharArraySet, true); SnowballProgram snowballProgram = SnowballDict.GetSnowball(this._Language);//词干。wangyunpeng,2015-8-17改成线程安全的调用方式。 if (snowballProgram != null) { result = new SnowballFilter(result, snowballProgram); } if (_UseIndexSynonyms) {//在创建索引的时候,将同义词,近义词,相关词存入索引。 result = new SynonymsFilter(this._Language, result); } return(result); }