protected WordSeg() { _MatchName = false; _FilterStopWords = false; _MatchDirection = T_Direction.LeftToRight; _ExtractWords = new CExtractWords(); _ExtractWords.CompareByPosEvent = CompareByPos; _POS = new CPOS(); _LastSaveTime = DateTime.Now; InitRules(); }
/// <summary> /// 合并英文专用词。 /// 如果字典中有英文专用词如U.S.A, C++.C#等 /// 需要对初步分词后的英文和字母进行合并 /// </summary> protected virtual string MergeEnglishSpecialWord(CExtractWords extractWords, ArrayList words, int start, ref int end) { StringBuilder str = new StringBuilder(); int i; for (i = start; i < words.Count; i++) { string word = (string)words[i]; //word 为空或者为空格回车换行等分割符号,中断扫描 if (word.Trim() == "") { break; } //如果遇到中文,中断扫描 if (word[0] >= 0x4e00 && word[0] <= 0x9fa5) { break; } str.Append(word); } String mergeString = str.ToString(); List <T_WordInfo> exWords = extractWords.ExtractFullText(mergeString); if (exWords.Count == 1) { T_WordInfo info = (T_WordInfo)exWords[0]; if (info.Word.Length == mergeString.Length) { end = i; return(mergeString); } } return(null); }