예제 #1
0
 protected WordSeg()
 {
     _MatchName       = false;
     _FilterStopWords = false;
     _MatchDirection  = T_Direction.LeftToRight;
     _ExtractWords    = new CExtractWords();
     _ExtractWords.CompareByPosEvent = CompareByPos;
     _POS          = new CPOS();
     _LastSaveTime = DateTime.Now;
     InitRules();
 }
예제 #2
0
        /// <summary>
        /// 合并英文专用词。
        /// 如果字典中有英文专用词如U.S.A, C++.C#等
        /// 需要对初步分词后的英文和字母进行合并
        /// </summary>
        protected virtual string MergeEnglishSpecialWord(CExtractWords extractWords, ArrayList words, int start, ref int end)
        {
            StringBuilder str = new StringBuilder();

            int i;

            for (i = start; i < words.Count; i++)
            {
                string word = (string)words[i];

                //word 为空或者为空格回车换行等分割符号,中断扫描
                if (word.Trim() == "")
                {
                    break;
                }

                //如果遇到中文,中断扫描
                if (word[0] >= 0x4e00 && word[0] <= 0x9fa5)
                {
                    break;
                }

                str.Append(word);
            }

            String            mergeString = str.ToString();
            List <T_WordInfo> exWords     = extractWords.ExtractFullText(mergeString);

            if (exWords.Count == 1)
            {
                T_WordInfo info = (T_WordInfo)exWords[0];
                if (info.Word.Length == mergeString.Length)
                {
                    end = i;
                    return(mergeString);
                }
            }

            return(null);
        }