Exemplo n.º 1
0
        /// <summary>
        /// Build tree
        /// </summary>
        /// <param name="pl">position length list</param>
        /// <param name="count">position length list count</param>
        /// <param name="parent">parent node</param>
        /// <param name="curIndex">current index of position length list</param>
        private void BuildTree(Node parent, int curIndex)
        {
            //嵌套太多的情况一般很少发生,如果发生,强行中断,以免造成博弈树遍历层次过多
            //降低系统效率
            if (_LeafNodeList.Count > 8192)
            {
                return;
            }

            if (curIndex < _PositionLengthArrCount - 1)
            {
                if (_PositionLengthArr[curIndex + 1].Position == _PositionLengthArr[curIndex].Position)
                {
                    BuildTree(parent, curIndex + 1);
                }
            }

            int spaceCount = parent.SpaceCount + _PositionLengthArr[curIndex].Position - (parent.PositionLength.Position + parent.PositionLength.Length);

            int    singleWordCount = parent.SingleWordCount + (_PositionLengthArr[curIndex].Length == 1 ? 1 : 0);
            double freqSum         = 0;

            if (_Options != null)
            {
                if (_Options.FrequencyFirst)
                {
                    freqSum = parent.FreqSum + _PositionLengthArr[curIndex].WordAttr.Frequency;
                }
            }

            Node curNode = new Node(_PositionLengthArr[curIndex], parent, parent.AboveCount + 1, spaceCount, singleWordCount, freqSum);

            int cur = curIndex + 1;

            while (cur < _PositionLengthArrCount)
            {
                if (_PositionLengthArr[cur].Position >= _PositionLengthArr[curIndex].Position + _PositionLengthArr[curIndex].Length)
                {
                    BuildTree(curNode, cur);
                    break;
                }

                cur++;
            }

            if (cur >= _PositionLengthArrCount)
            {
                curNode.SpaceCount += _InputStringLength - curNode.PositionLength.Position - curNode.PositionLength.Length;
                _LeafNodeList.Add(curNode);
            }
        }
Exemplo n.º 2
0
        public Framework.AppendList <PositionLength> GetAllMatchs(string text, bool chineseNameIdentify)
        {
            Framework.AppendList <PositionLength> result = new Framework.AppendList <PositionLength>();

            if (text == null && text == "")
            {
                return(result);
            }

            string keyText = text;

            if (text[0] < 128)
            {
                keyText = keyText.ToLower();
            }

            for (int i = 0; i < text.Length; i++)
            {
                byte[] lenList;
                char   fst = keyText[i];

                List <string> chsNames = null;

                if (chineseNameIdentify)
                {
                    chsNames = ChineseName.Match(text, i);

                    if (chsNames != null)
                    {
                        foreach (string name in chsNames)
                        {
                            WordAttribute wa = new WordAttribute(name, POS.POS_A_NR, 0);

                            result.Add(new PositionLength(i, name.Length, wa));
                        }
                    }
                }


                WordAttribute fwa;
                if (_FirstCharDict.TryGetValue(fst, out fwa))
                {
                    result.Add(new PositionLength(i, 1, fwa));
                }

                if (i < keyText.Length - 1)
                {
                    uint doubleChar = ((uint)keyText[i] * 65536) + keyText[i + 1];

                    if (_DoubleCharDict.TryGetValue(doubleChar, out fwa))
                    {
                        result.Add(new PositionLength(i, 2, fwa));
                    }
                }

                if (i >= keyText.Length - 2)
                {
                    continue;
                }

                long tripleChar = ((long)keyText[i]) * 0x100000000 + (uint)(keyText[i + 1] * 65536) + keyText[i + 2];

                if (_TripleCharDict.TryGetValue(tripleChar, out lenList))
                {
                    foreach (byte len in lenList)
                    {
                        if (len == 0)
                        {
                            break;
                        }

                        if (i + len > keyText.Length)
                        {
                            continue;
                        }

                        string key = keyText.Substring(i, len);

                        WordAttribute wa;

                        if (_WordDict.TryGetValue(key, out wa))
                        {
                            if (chsNames != null)
                            {
                                bool find = false;

                                foreach (string name in chsNames)
                                {
                                    if (wa.Word == name)
                                    {
                                        find = true;
                                        break;
                                    }
                                }

                                if (find)
                                {
                                    continue;
                                }
                            }

                            result.Add(new PositionLength(i, len, wa));
                        }
                    }
                }
            }

            return(result);
        }