Exemple #1
0
        public PosBinRule(WordPosBuilder pos)
        {
            _POS       = pos;
            _PosBinTbl = new Hashtable();

            foreach (DualityWordBin bin in _PosBins)
            {
                _PosBinTbl[bin.HashCode] = true;
            }
        }
Exemple #2
0
        public MatchName(WordPosBuilder pos)
        {
            _PosBinRule = new PosBinRule(pos);
            _Pos        = pos;

            _FamilyNameTbl = new Hashtable();
            foreach (String familyName in FAMILY_NAMES)
            {
                _FamilyNameTbl[familyName] = true;
            }
        }
Exemple #3
0
        public int ProcessRule(List <string> preWords, int index, List <string> retWords)
        {
            String word = (String)preWords[index];
            bool   isReg;
            int    pos = WordPosBuilder.GetPosFromInnerPosList(_POS.GetPos(word, out isReg));
            String num;

            if ((pos & (int)PosEnum.POS_A_M) == (int)PosEnum.POS_A_M)
            {
                num = word;
                int i = 0;

                for (i = index + 1; i < preWords.Count; i++)
                {
                    String next    = (String)preWords[i];
                    int    nextPos = WordPosBuilder.GetPosFromInnerPosList(_POS.GetPos(next, out isReg));
                    if ((nextPos & (int)PosEnum.POS_A_M) == (int)PosEnum.POS_A_M)
                    {
                        num += next;
                    }
                    else
                    {
                        break;
                    }
                }

                if (num == word)
                {
                    return(-1);
                }
                else
                {
                    retWords.Add(num);

                    return(i);
                }
            }
            else
            {
                return(-1);
            }
        }
Exemple #4
0
        /// <summary>
        /// 初始化分词组件的工作环境。文件读取异常应在调用时进行处理。
        /// </summary>
        /// <param name="segwords">主词典的Xml文件</param>
        /// <param name="segchsstopwords">中文停止词词典的Xml文件</param>
        /// <param name="segchssymbol">中文标点符号的Xml文件</param>
        /// <param name="segengstopwords">英文停止词词典的Xml文件</param>
        /// <param name="segengsymbol">英文标点符号的Xml文件</param>
        public static void Initialize(
            string segwords,
            string segchsstopwords,
            string segchssymbol,
            string segengstopwords,
            string segengsymbol)
        {
            SegWords          = LoadSegWords(segwords);
            SegChsStopwordDic = LoadStopwords(segchsstopwords, SegChsStopwordDic);
            SegChsStopwordDic = LoadStopwords(segchssymbol, SegChsStopwordDic);
            SegEngStopwordDic = LoadStopwords(segengstopwords, SegEngStopwordDic);
            SegEngStopwordDic = LoadStopwords(segengsymbol, SegEngStopwordDic);

            ExtractInfo     = new ExtractInfo();
            _WordPosBuilder = new WordPosBuilder();
            ExtractInfo.CompareByPosEvent = CompareByPos;
            foreach (WordPos item in SegWords.WordPosList)
            {
                ExtractInfo.InsertWordToDfa(item.Word);
                _WordPosBuilder.AddWordPos(item.Word, item.Pos);
            }

            SetOwnerRules();
        }
Exemple #5
0
 public MergeNumRule(WordPosBuilder pos)
 {
     _POS = pos;
 }