/// <summary> /// 前向遍历,将有无冲突的词元添加进path,有冲突的词元添加入栈 /// </summary> /// <param name="cell"></param> /// <param name="path"></param> /// <returns>返回有冲突的词元栈</returns> private Stack <QuickSortSet <Lexeme> .Cell> ForwardPath(QuickSortSet <Lexeme> .Cell cell, LexemePath path) { // 发生冲突的 Lexeme 栈 var stack = new Stack <QuickSortSet <Lexeme> .Cell>(); var cur = cell; while (cur != null && cur.V != null) { if (!path.ExpandNonOverlapLexeme(cur.V)) // cur.V与path有冲突,cur.V没有被添加进path { // 词元交叉,添加失败 stack.Push(cur); } cur = cur.Next; } return(stack); }
/// <summary> /// 歧义识别 /// </summary> /// <param name="cell"></param> /// <param name="fullTextLen"></param> /// <returns></returns> public LexemePath Judge(QuickSortSet <Lexeme> .Cell cell, int fullTextLen) { // 无冲突的词元链候选集合 var pathOptions = new SortedSet <LexemePath>(); // 用于存储无冲突词元的词元链 var option = new LexemePath(); var stack = ForwardPath(cell, option); pathOptions.Add(option.Copy()); while (stack.Count > 0) { var curCell = stack.Pop(); // 回滚词元链 BackPath(curCell.V, option); // 从当前歧义位置开始,前向获取无冲突的词元 ForwardPath(curCell, option); pathOptions.Add(option.Copy()); } return(pathOptions.First()); // 排名越靠前的是越优的分词方案 }