Пример #1
0
 public bool TryGetValue(char c, out TrieNode2 node)
 {
     if (minflag <= (uint)c && maxflag >= (uint)c)
     {
         return(m_values.TryGetValue(c, out node));
     }
     node = null;
     return(false);
 }
Пример #2
0
 public void Add(char c, TrieNode2 node3)
 {
     if (minflag > c)
     {
         minflag = c;
     }
     if (maxflag < c)
     {
         maxflag = c;
     }
     m_values.Add(c, node3);
 }
Пример #3
0
        /// <summary>
        /// 在文本中替换所有的关键字
        /// </summary>
        /// <param name="text">文本</param>
        /// <param name="replaceChar">替换符</param>
        /// <returns></returns>
        public string Replace(string text, char replaceChar = '*')
        {
            StringBuilder result = new StringBuilder(text);

            TrieNode2 ptr = null;

            for (int i = 0; i < text.Length; i++)
            {
                TrieNode2 tn;
                if (ptr == null)
                {
                    tn = _first[text[i]];
                }
                else
                {
                    if (ptr.TryGetValue(text[i], out tn) == false)
                    {
                        tn = _first[text[i]];
                    }
                }
                if (tn != null)
                {
                    if (tn.End)
                    {
                        var maxLength = _keywords[tn.Results[0]].Length;

                        var start = i + 1 - maxLength;
                        for (int j = start; j <= i; j++)
                        {
                            result[j] = replaceChar;
                        }
                    }
                }
                ptr = tn;
            }
            return(result.ToString());
        }
Пример #4
0
        protected void SetKeywords()
        {
            var root = new TrieNode();
            Dictionary <int, List <TrieNode> > allNodeLayers = new Dictionary <int, List <TrieNode> >();

            for (int i = 0; i < _keywords.Length; i++)
            {
                var p  = _keywords[i];
                var nd = root;
                for (int j = 0; j < p.Length; j++)
                {
                    nd = nd.Add((char)p[j]);
                    if (nd.Layer == 0)
                    {
                        nd.Layer = j + 1;
                        List <TrieNode> trieNodes;
                        if (allNodeLayers.TryGetValue(nd.Layer, out trieNodes) == false)
                        {
                            trieNodes = new List <TrieNode>();
                            allNodeLayers[nd.Layer] = trieNodes;
                        }
                        trieNodes.Add(nd);
                    }
                }
                nd.SetResults(i);
            }

            List <TrieNode> allNode = new List <TrieNode>();

            allNode.Add(root);
            foreach (var trieNodes in allNodeLayers)
            {
                foreach (var nd in trieNodes.Value)
                {
                    allNode.Add(nd);
                }
            }
            allNodeLayers = null;


            for (int i = 1; i < allNode.Count; i++)
            {
                var nd = allNode[i];
                nd.Index = i;
                TrieNode r = nd.Parent.Failure;
                char     c = nd.Char;
                while (r != null && !r.m_values.ContainsKey(c))
                {
                    r = r.Failure;
                }
                if (r == null)
                {
                    nd.Failure = root;
                }
                else
                {
                    nd.Failure = r.m_values[c];
                    foreach (var result in nd.Failure.Results)
                    {
                        nd.SetResults(result);
                    }
                }
            }
            root.Failure = root;


            var allNode2 = new List <TrieNode2>();

            for (int i = 0; i < allNode.Count; i++)
            {
                allNode2.Add(new TrieNode2());
            }
            for (int i = 0; i < allNode2.Count; i++)
            {
                var oldNode = allNode[i];
                var newNode = allNode2[i];

                foreach (var item in oldNode.m_values)
                {
                    var key   = item.Key;
                    var index = item.Value.Index;
                    newNode.Add(key, allNode2[index]);
                }
                foreach (var item in oldNode.Results)
                {
                    newNode.SetResults(item);
                }
                oldNode = oldNode.Failure;
                while (oldNode != root)
                {
                    foreach (var item in oldNode.m_values)
                    {
                        var key   = item.Key;
                        var index = item.Value.Index;
                        if (newNode.HasKey(key) == false)
                        {
                            newNode.Add(key, allNode2[index]);
                        }
                    }
                    foreach (var item in oldNode.Results)
                    {
                        newNode.SetResults(item);
                    }
                    oldNode = oldNode.Failure;
                }
            }
            allNode.Clear();
            allNode = null;
            root    = null;

            TrieNode2[] first = new TrieNode2[char.MaxValue + 1];
            foreach (var item in allNode2[0].m_values)
            {
                first[item.Key] = item.Value;
            }
            _first = first;
        }
Пример #5
0
            public bool Find2(string text, string hz, string[] pinyins, int keysCount)
            {
                int       findCount      = 0;
                int       lastWordsIndex = -1;
                TrieNode2 ptr            = null;

                for (int i = 0; i < text.Length; i++)
                {
                    TrieNode2 tn;
                    if (ptr == null)
                    {
                        tn = _first[text[i]];
                    }
                    else
                    {
                        if (ptr.TryGetValue(text[i], out tn) == false)
                        {
                            tn = _first[text[i]];
                        }
                    }
                    if (tn != null)
                    {
                        if (tn.End)
                        {
                            foreach (var result in tn.Results)
                            {
                                var index = _indexs[result];
                                if (index != findCount)
                                {
                                    continue;
                                }

                                var keyword = _keywords[result];
                                var start   = i + 1 - keyword.Length;
                                if (lastWordsIndex >= start)
                                {
                                    continue;
                                }

                                var  end            = i;
                                bool isok           = true;
                                var  keywordPinyins = _keywordPinyins[result];

                                for (int j = 0; j < keyword.Length; j++)
                                {
                                    var idx = start + j;
                                    var py  = keywordPinyins[j];
                                    if (py.Length == 1 && py[0] >= 0x3400 && py[0] <= 0x9fd5)
                                    {
                                        if (hz[idx] != py[0])
                                        {
                                            isok = false;
                                            break;
                                        }
                                    }
                                    else
                                    {
                                        if (pinyins[idx].StartsWith(py) == false)
                                        {
                                            isok = false;
                                            break;
                                        }
                                    }
                                }
                                if (isok)
                                {
                                    findCount++;
                                    lastWordsIndex = i;
                                    if (findCount == keysCount)
                                    {
                                        return(true);
                                    }
                                    break;
                                }
                            }
                        }
                    }
                    ptr = tn;
                }
                return(false);
            }