Пример #1
0
 /// <summary>
 /// 创建分析器
 /// </summary>
 /// <param name="language">语言</param>
 /// <param name="useIndexSynonyms">true表示在创建索引时,将同义词,近义词,相关词存入索引;false表示不使用。</param>
 public AnalyzerBus(string language, bool useIndexSynonyms = false)
 {
     this._EnableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(global::Lucene.Net.Util.Version.LUCENE_30);
     this._Language         = language;
     this._UseIndexSynonyms = useIndexSynonyms;
     this._SymbolAnalyzer   = new SymbolAnalyzer();
     this._Analyzer         = AnalyzerDict.GetAnalyzer(language.ToUpper());
     this._StopCharArraySet = StopWord.StopWordList;
 }
Пример #2
0
        /// <summary>
        /// 将字分成字的数组
        /// </summary>
        /// <param name="word"></param>
        /// <returns></returns>
        public static string[] SplitWord(string word)
        {
            List <string> list = new List <string>();

            if (string.IsNullOrEmpty(word))
            {
                return(list.ToArray());
            }
            using (SymbolAnalyzer simple = new SymbolAnalyzer())
            {
                using (TokenStream ts = simple.ReusableTokenStream("", new StringReader(word)))//只显示分词信息,不需要使用FieldName
                {
                    while (ts.IncrementToken())
                    {
                        ITermAttribute attribute = ts.GetAttribute <ITermAttribute>();
                        list.Add(attribute.Term);
                    }
                }
            }
            return(list.ToArray());
        }