/// <summary> /// 结束分词,添加最大词进入句子的单词序列 /// </summary> private void endFlex() { /*********************** * 识别出单词 * ***********************/ if (_max_node != null) { _susentence.Words.Add(_max_node.Content.Copy()); } /************************** * 未识别出单词 * ********************************/ else { /********************************* * 单字词词典包含信息 * *********************************************/ if (DicProvider.SingleDic.ContainsKey(_context[_currentPos - _serachTime + 1])) { _susentence.Words.Add(DicProvider.GetWordInfoFromSingleDic(_context[_currentPos - _serachTime + 1]).Copy()); } /*************************** * 单字词词典未包含信息 * ****************************/ else { _susentence.Words.Add(new _WordInnfo(_context[_currentPos - _serachTime + 1].ToString(), WordType.Unknow)); } } /******************** * 返回位置 * **********************************/ if (_maxTime == 0) { previous(_serachTime - 1); } else { previous(_serachTime - _maxTime); } /******************** * 重置参数 * ******************/ _maxTime = _serachTime = 0; _max_node = null; _isSearchFromTree = true; }
private void searchInTheTree() { _serachTime++; /************************* * 是否从根节点开始搜索 * *******************/ if (_isSearchFromTree) { _current_node = DicProvider.PositiveDic.Get_Node(_token.ToString()); } else { _current_node = _current_node.Get_Child(_token); } /********************* * 记录找到的最长词 * ********************************/ if (_current_node != null) { if (!_current_node.Is_Empty) { _max_node = _current_node; _maxTime = _serachTime; } } /***************************** * 当前搜索节点不是从根节点 * **************************/ _isSearchFromTree = false; /************************** * 检查是否结束向下搜索 * ***********************/ if (_current_node == null) { endFlex(); } }
/// <summary> /// 反向分词函数,扫描结束后的处理 /// </summary> /// <param name="ls">反向分词结果的词链表</param> /// <param name="current_node">当前节点</param> /// <param name="max_node">最大节点</param> private void afterReflex(List <_WordInnfo> ls, Search_Tree_Node <_WordInnfo> current_node, Search_Tree_Node <_WordInnfo> max_node, string temp) { if (current_node != null) { /************************ * current is not empty * *****************************/ if (!current_node.Is_Empty) { max_node = current_node; } if (max_node != null) { ls.Add(max_node.Content.Copy()); if (max_node.Content.Name.Length < current_node.Full_Name.Length) { var b = current_node.Full_Name.Replace(max_node.Content.Name, ""); foreach (var item in b) { if (_provider.SingleDic.ContainsKey(item)) { ls.Add(_provider.SingleDic[item].Copy()); } else { ls.Add(new _WordInnfo(current_node.Full_Name.Replace(item.ToString(), "")) { MaxType = WordType.Unknow }); } } } } else { foreach (var item in current_node.Full_Name) { if (_provider.SingleDic.ContainsKey(item)) { ls.Add(_provider.SingleDic[item].Copy()); } else { ls.Add(new _WordInnfo(item.ToString()) { MaxType = WordType.Unknow }); } } } } else { if (temp.Length > 0) { if (_provider.SingleDic.ContainsKey(temp[temp.Length - 1])) { ls.Add(_provider.SingleDic[temp[temp.Length - 1]].Copy()); } else { ls.Add(new _WordInnfo(temp[temp.Length - 1].ToString()) { MaxType = WordType.Unknow }); } } } }
/// <summary> /// 分词 /// </summary> /// <param name="temp"></param> /// <returns></returns> public List <_WordInnfo> Reflex(string temp) { var ls = new List <_WordInnfo>(); /******************* * 获取有问题的字符串 * **************/ Search_Tree_Node <_WordInnfo> current_node = null; Search_Tree_Node <_WordInnfo> max_node = null; int searchTime = 0; int max_time = 0; bool isSearchFromTheTree = true; for (int i = 0; i < temp.Length; i++) { /**************************** * 处理数字、字母、未知字符 * ****************************/ if (searchTime == 0) { /********************字母、数字*************************/ if (Regex_Helper.Is_Math_Expression(temp[i].ToString())) { reflexNumberAlpha(ls, ref temp, ref i); continue; } /***********************未知字符********************************/ if (!Regex_Helper.Is_ACN(temp[i].ToString()) && !Regex_Helper.Is_Mark(temp[i].ToString())) { reflexUnknowChars(ls, ref temp, ref i); continue; } } /****************** * 增加搜索次数 * *********************/ searchTime++; if (isSearchFromTheTree) { current_node = _provider.NegtiveDic.Get_Node(temp[i].ToString()); isSearchFromTheTree = false; /*************** * 是否结束分词 * ********************/ if (current_node == null) { endReflex(ref temp, ls, ref i, ref searchTime, ref max_time, ref isSearchFromTheTree, current_node, ref max_node); } else /************* * 是否更新maxnode * ********************/ if (!current_node.Is_Empty) { max_node = current_node; max_time = searchTime; } } else { current_node = current_node.Get_Child(temp[i]); /*************** * 是否结束分词 * ********************/ if (current_node == null) { endReflex(ref temp, ls, ref i, ref searchTime, ref max_time, ref isSearchFromTheTree, current_node, ref max_node); } else /************* * 是否更新maxnode * ********************/ if (!current_node.Is_Empty) { max_node = current_node; max_time = searchTime - 1; } } } /****************** * 扫描结束处理 * **********************/ afterReflex(ls, current_node, max_node, temp); /****************反转链表******************/ reverseList(ls); return(ls); }
/// <summary> /// 反向分词函数,获得词,并且重置参数 /// </summary> /// <param name="temp">要进行反向分词的字符串序列</param> /// <param name="ls">反向分词结果的词链表</param> /// <param name="i">当前扫描位置</param> /// <param name="searchTime">搜索次数</param> /// <param name="max_time">最大节点的次数</param> /// <param name="isSearchFromTheTree">是否从根节点开始扫描</param> /// <param name="current_node">当前节点</param> /// <param name="max_node">最大节点</param> private void endReflex(ref string temp, List <_WordInnfo> ls, ref int i, ref int searchTime, ref int max_time, ref bool isSearchFromTheTree, Search_Tree_Node <_WordInnfo> current_node, ref Search_Tree_Node <_WordInnfo> max_node) { if (max_node != null) { ls.Add(max_node.Content.Copy()); } else { if (_provider.SingleDic.ContainsKey(temp[i - (searchTime - max_time - 1)])) { ls.Add(_provider.GetWordInfoFromSingleDic(temp[i - (searchTime - max_time - 1)]).Copy()); } else { ls.Add(new _WordInnfo() { Name = temp[i - (searchTime - max_time - 1)].ToString(), TypeInfo = new Dictionary <WordType, int> { { WordType.Noun, 1 } }, MaxType = WordType.Unknow }); } } i = i - (searchTime - max_time) + 1; isSearchFromTheTree = true; current_node = null; max_node = null; searchTime = max_time = 0; isSearchFromTheTree = true; }