/// <summary> /// 创建分析器 /// </summary> /// <param name="language">语言</param> /// <param name="useIndexSynonyms">true表示在创建索引时,将同义词,近义词,相关词存入索引;false表示不使用。</param> public AnalyzerBus(string language, bool useIndexSynonyms = false) { this._EnableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(global::Lucene.Net.Util.Version.LUCENE_30); this._Language = language; this._UseIndexSynonyms = useIndexSynonyms; this._SymbolAnalyzer = new SymbolAnalyzer(); this._Analyzer = AnalyzerDict.GetAnalyzer(language.ToUpper()); this._StopCharArraySet = StopWord.StopWordList; }
/// <summary> /// 将字分成字的数组 /// </summary> /// <param name="word"></param> /// <returns></returns> public static string[] SplitWord(string word) { List <string> list = new List <string>(); if (string.IsNullOrEmpty(word)) { return(list.ToArray()); } using (SymbolAnalyzer simple = new SymbolAnalyzer()) { using (TokenStream ts = simple.ReusableTokenStream("", new StringReader(word)))//只显示分词信息,不需要使用FieldName { while (ts.IncrementToken()) { ITermAttribute attribute = ts.GetAttribute <ITermAttribute>(); list.Add(attribute.Term); } } } return(list.ToArray()); }