public PosBinRule(WordPosBuilder pos) { _POS = pos; _PosBinTbl = new Hashtable(); foreach (DualityWordBin bin in _PosBins) { _PosBinTbl[bin.HashCode] = true; } }
public MatchName(WordPosBuilder pos) { _PosBinRule = new PosBinRule(pos); _Pos = pos; _FamilyNameTbl = new Hashtable(); foreach (String familyName in FAMILY_NAMES) { _FamilyNameTbl[familyName] = true; } }
public int ProcessRule(List <string> preWords, int index, List <string> retWords) { String word = (String)preWords[index]; bool isReg; int pos = WordPosBuilder.GetPosFromInnerPosList(_POS.GetPos(word, out isReg)); String num; if ((pos & (int)PosEnum.POS_A_M) == (int)PosEnum.POS_A_M) { num = word; int i = 0; for (i = index + 1; i < preWords.Count; i++) { String next = (String)preWords[i]; int nextPos = WordPosBuilder.GetPosFromInnerPosList(_POS.GetPos(next, out isReg)); if ((nextPos & (int)PosEnum.POS_A_M) == (int)PosEnum.POS_A_M) { num += next; } else { break; } } if (num == word) { return(-1); } else { retWords.Add(num); return(i); } } else { return(-1); } }
/// <summary> /// 初始化分词组件的工作环境。文件读取异常应在调用时进行处理。 /// </summary> /// <param name="segwords">主词典的Xml文件</param> /// <param name="segchsstopwords">中文停止词词典的Xml文件</param> /// <param name="segchssymbol">中文标点符号的Xml文件</param> /// <param name="segengstopwords">英文停止词词典的Xml文件</param> /// <param name="segengsymbol">英文标点符号的Xml文件</param> public static void Initialize( string segwords, string segchsstopwords, string segchssymbol, string segengstopwords, string segengsymbol) { SegWords = LoadSegWords(segwords); SegChsStopwordDic = LoadStopwords(segchsstopwords, SegChsStopwordDic); SegChsStopwordDic = LoadStopwords(segchssymbol, SegChsStopwordDic); SegEngStopwordDic = LoadStopwords(segengstopwords, SegEngStopwordDic); SegEngStopwordDic = LoadStopwords(segengsymbol, SegEngStopwordDic); ExtractInfo = new ExtractInfo(); _WordPosBuilder = new WordPosBuilder(); ExtractInfo.CompareByPosEvent = CompareByPos; foreach (WordPos item in SegWords.WordPosList) { ExtractInfo.InsertWordToDfa(item.Word); _WordPosBuilder.AddWordPos(item.Word, item.Pos); } SetOwnerRules(); }
public MergeNumRule(WordPosBuilder pos) { _POS = pos; }