Esempio n. 1
0
        /// <summary>
        /// 结束分词,添加最大词进入句子的单词序列
        /// </summary>
        private void endFlex()
        {
            /***********************
             * 识别出单词
             * ***********************/
            if (_max_node != null)
            {
                _susentence.Words.Add(_max_node.Content.Copy());
            }

            /**************************
             * 未识别出单词
             * ********************************/
            else
            {
                /*********************************
                 * 单字词词典包含信息
                 * *********************************************/
                if (DicProvider.SingleDic.ContainsKey(_context[_currentPos - _serachTime + 1]))
                {
                    _susentence.Words.Add(DicProvider.GetWordInfoFromSingleDic(_context[_currentPos - _serachTime + 1]).Copy());
                }

                /***************************
                 * 单字词词典未包含信息
                 * ****************************/
                else
                {
                    _susentence.Words.Add(new _WordInnfo(_context[_currentPos - _serachTime + 1].ToString(), WordType.Unknow));
                }
            }

            /********************
             * 返回位置
             * **********************************/
            if (_maxTime == 0)
            {
                previous(_serachTime - 1);
            }
            else
            {
                previous(_serachTime - _maxTime);
            }

            /********************
            * 重置参数
            * ******************/
            _maxTime          = _serachTime = 0;
            _max_node         = null;
            _isSearchFromTree = true;
        }
Esempio n. 2
0
        private void searchInTheTree()
        {
            _serachTime++;

            /*************************
             * 是否从根节点开始搜索
             * *******************/
            if (_isSearchFromTree)
            {
                _current_node = DicProvider.PositiveDic.Get_Node(_token.ToString());
            }
            else
            {
                _current_node = _current_node.Get_Child(_token);
            }


            /*********************
             * 记录找到的最长词
             * ********************************/
            if (_current_node != null)
            {
                if (!_current_node.Is_Empty)
                {
                    _max_node = _current_node;
                    _maxTime  = _serachTime;
                }
            }

            /*****************************
             * 当前搜索节点不是从根节点
             * **************************/
            _isSearchFromTree = false;

            /**************************
             * 检查是否结束向下搜索
             * ***********************/
            if (_current_node == null)
            {
                endFlex();
            }
        }
Esempio n. 3
0
        /// <summary>
        /// 反向分词函数,扫描结束后的处理
        /// </summary>
        /// <param name="ls">反向分词结果的词链表</param>
        /// <param name="current_node">当前节点</param>
        /// <param name="max_node">最大节点</param>
        private void afterReflex(List <_WordInnfo> ls, Search_Tree_Node <_WordInnfo> current_node, Search_Tree_Node <_WordInnfo> max_node, string temp)
        {
            if (current_node != null)
            {
                /************************
                 * current is not empty
                 * *****************************/
                if (!current_node.Is_Empty)
                {
                    max_node = current_node;
                }

                if (max_node != null)
                {
                    ls.Add(max_node.Content.Copy());

                    if (max_node.Content.Name.Length < current_node.Full_Name.Length)
                    {
                        var b = current_node.Full_Name.Replace(max_node.Content.Name, "");

                        foreach (var item in b)
                        {
                            if (_provider.SingleDic.ContainsKey(item))
                            {
                                ls.Add(_provider.SingleDic[item].Copy());
                            }
                            else
                            {
                                ls.Add(new _WordInnfo(current_node.Full_Name.Replace(item.ToString(), ""))
                                {
                                    MaxType = WordType.Unknow
                                });
                            }
                        }
                    }
                }
                else
                {
                    foreach (var item in current_node.Full_Name)
                    {
                        if (_provider.SingleDic.ContainsKey(item))
                        {
                            ls.Add(_provider.SingleDic[item].Copy());
                        }
                        else
                        {
                            ls.Add(new _WordInnfo(item.ToString())
                            {
                                MaxType = WordType.Unknow
                            });
                        }
                    }
                }
            }
            else
            {
                if (temp.Length > 0)
                {
                    if (_provider.SingleDic.ContainsKey(temp[temp.Length - 1]))
                    {
                        ls.Add(_provider.SingleDic[temp[temp.Length - 1]].Copy());
                    }
                    else
                    {
                        ls.Add(new _WordInnfo(temp[temp.Length - 1].ToString())
                        {
                            MaxType = WordType.Unknow
                        });
                    }
                }
            }
        }
Esempio n. 4
0
        /// <summary>
        /// 分词
        /// </summary>
        /// <param name="temp"></param>
        /// <returns></returns>
        public List <_WordInnfo> Reflex(string temp)
        {
            var ls = new List <_WordInnfo>();

            /*******************
             * 获取有问题的字符串
             * **************/
            Search_Tree_Node <_WordInnfo> current_node = null;
            Search_Tree_Node <_WordInnfo> max_node     = null;
            int  searchTime          = 0;
            int  max_time            = 0;
            bool isSearchFromTheTree = true;

            for (int i = 0; i < temp.Length; i++)
            {
                /****************************
                 * 处理数字、字母、未知字符
                 * ****************************/
                if (searchTime == 0)
                {
                    /********************字母、数字*************************/
                    if (Regex_Helper.Is_Math_Expression(temp[i].ToString()))
                    {
                        reflexNumberAlpha(ls, ref temp, ref i);
                        continue;
                    }
                    /***********************未知字符********************************/
                    if (!Regex_Helper.Is_ACN(temp[i].ToString()) && !Regex_Helper.Is_Mark(temp[i].ToString()))
                    {
                        reflexUnknowChars(ls, ref temp, ref i);
                        continue;
                    }
                }

                /******************
                 * 增加搜索次数
                 * *********************/
                searchTime++;
                if (isSearchFromTheTree)
                {
                    current_node = _provider.NegtiveDic.Get_Node(temp[i].ToString());

                    isSearchFromTheTree = false;

                    /***************
                     * 是否结束分词
                     * ********************/
                    if (current_node == null)
                    {
                        endReflex(ref temp, ls, ref i, ref searchTime, ref max_time, ref isSearchFromTheTree, current_node, ref max_node);
                    }
                    else

                    /*************
                     * 是否更新maxnode
                     * ********************/
                    if (!current_node.Is_Empty)
                    {
                        max_node = current_node;
                        max_time = searchTime;
                    }
                }
                else
                {
                    current_node = current_node.Get_Child(temp[i]);

                    /***************
                     * 是否结束分词
                     * ********************/
                    if (current_node == null)
                    {
                        endReflex(ref temp, ls, ref i, ref searchTime, ref max_time, ref isSearchFromTheTree, current_node, ref max_node);
                    }

                    else

                    /*************
                     * 是否更新maxnode
                     * ********************/
                    if (!current_node.Is_Empty)
                    {
                        max_node = current_node;
                        max_time = searchTime - 1;
                    }
                }
            }

            /******************
             * 扫描结束处理
             * **********************/

            afterReflex(ls, current_node, max_node, temp);

            /****************反转链表******************/
            reverseList(ls);
            return(ls);
        }
Esempio n. 5
0
        /// <summary>
        /// 反向分词函数,获得词,并且重置参数
        /// </summary>
        /// <param name="temp">要进行反向分词的字符串序列</param>
        /// <param name="ls">反向分词结果的词链表</param>
        /// <param name="i">当前扫描位置</param>
        /// <param name="searchTime">搜索次数</param>
        /// <param name="max_time">最大节点的次数</param>
        /// <param name="isSearchFromTheTree">是否从根节点开始扫描</param>
        /// <param name="current_node">当前节点</param>
        /// <param name="max_node">最大节点</param>
        private void endReflex(ref string temp, List <_WordInnfo> ls, ref int i, ref int searchTime, ref int max_time, ref bool isSearchFromTheTree, Search_Tree_Node <_WordInnfo> current_node, ref Search_Tree_Node <_WordInnfo> max_node)
        {
            if (max_node != null)
            {
                ls.Add(max_node.Content.Copy());
            }
            else
            {
                if (_provider.SingleDic.ContainsKey(temp[i - (searchTime - max_time - 1)]))
                {
                    ls.Add(_provider.GetWordInfoFromSingleDic(temp[i - (searchTime - max_time - 1)]).Copy());
                }
                else
                {
                    ls.Add(new _WordInnfo()
                    {
                        Name = temp[i - (searchTime - max_time - 1)].ToString(), TypeInfo = new Dictionary <WordType, int> {
                            { WordType.Noun, 1 }
                        }, MaxType = WordType.Unknow
                    });
                }
            }

            i = i - (searchTime - max_time) + 1;
            isSearchFromTheTree = true;
            current_node        = null;
            max_node            = null;
            searchTime          = max_time = 0;
            isSearchFromTheTree = true;
        }