Exemple #1
0
        /// <summary>
        /// 在文本中查找所有的关键字
        /// </summary>
        /// <param name="text">文本</param>
        /// <returns></returns>
        public List <WordsSearchResult> FindAll(string text)
        {
            TrieNode2 ptr = null;
            List <WordsSearchResult> list = new List <WordsSearchResult>();

            for (int i = 0; i < text.Length; i++)
            {
                TrieNode2 tn;
                if (ptr == null)
                {
                    tn = _first[text[i]];
                }
                else
                {
                    if (ptr.TryGetValue(text[i], out tn) == false)
                    {
                        tn = _first[text[i]];
                    }
                }
                if (tn != null)
                {
                    if (tn.End)
                    {
                        foreach (var item in tn.Results)
                        {
                            var keyword = _keywords[item];
                            list.Add(new WordsSearchResult(keyword, i + 1 - keyword.Length, i, item));
                        }
                    }
                }
                ptr = tn;
            }
            return(list);
        }
Exemple #2
0
        /// <summary>
        /// 判断文本是否包含关键字
        /// </summary>
        /// <param name="text">文本</param>
        /// <returns></returns>
        public bool ContainsAny(string text)
        {
            TrieNode2 ptr = null;

            foreach (char t in text)
            {
                TrieNode2 tn;
                if (ptr == null)
                {
                    tn = _first[t];
                }
                else
                {
                    if (ptr.TryGetValue(t, out tn) == false)
                    {
                        tn = _first[t];
                    }
                }
                if (tn != null)
                {
                    if (tn.End)
                    {
                        return(true);
                    }
                }
                ptr = tn;
            }
            return(false);
        }
Exemple #3
0
        /// <summary>
        /// 在文本中查找第一个关键字
        /// </summary>
        /// <param name="text">文本</param>
        /// <returns></returns>
        public WordsSearchResult FindFirst(string text)
        {
            TrieNode2 ptr = null;

            for (int i = 0; i < text.Length; i++)
            {
                TrieNode2 tn;
                if (ptr == null)
                {
                    tn = _first[text[i]];
                }
                else
                {
                    if (ptr.TryGetValue(text[i], out tn) == false)
                    {
                        tn = _first[text[i]];
                    }
                }
                if (tn != null)
                {
                    if (tn.End)
                    {
                        var item    = tn.Results[0];
                        var keyword = _keywords[item];
                        return(new WordsSearchResult(keyword, i + 1 - keyword.Length, i, item));
                    }
                }
                ptr = tn;
            }
            return(null);
        }
Exemple #4
0
        /// <summary>
        /// 在文本中查找所有的关键字
        /// </summary>
        /// <param name="text">文本</param>
        /// <returns></returns>
        public List <string> FindAll(string text)
        {
            TrieNode2     ptr  = null;
            List <string> list = new List <string>();

            foreach (char t in text)
            {
                TrieNode2 tn;
                if (ptr == null)
                {
                    tn = _first[t];
                }
                else
                {
                    if (ptr.TryGetValue(t, out tn) == false)
                    {
                        tn = _first[t];
                    }
                }
                if (tn != null)
                {
                    if (tn.End)
                    {
                        foreach (var item in tn.Results)
                        {
                            list.Add(_keywords[item]);
                        }
                    }
                }
                ptr = tn;
            }
            return(list);
        }
Exemple #5
0
        /// <summary>
        /// 在文本中查找第一个关键字
        /// </summary>
        /// <param name="text">文本</param>
        /// <returns></returns>
        public string FindFirst(string text)
        {
            TrieNode2 ptr = null;

            foreach (char t in text)
            {
                TrieNode2 tn;
                if (ptr == null)
                {
                    tn = _first[t];
                }
                else
                {
                    if (ptr.TryGetValue(t, out tn) == false)
                    {
                        tn = _first[t];
                    }
                }
                if (tn != null)
                {
                    if (tn.End)
                    {
                        return(_keywords[tn.Results[0]]);
                    }
                }
                ptr = tn;
            }
            return(null);
        }
        public IList <string> Input(char c)
        {
            if (c == '#')
            {
                string sent = curSb.ToString();
                AddSentence(sent);
                curNode = root;
                curSb   = new StringBuilder();
                return(new List <string>());
            }
            else
            {
                curSb.Append(c);
                int charIndex = c == ' ' ? 26 : c - 'a';

                if (curNode == null || curNode.children[charIndex] == null)
                {
                    curNode = null;
                    return(new List <string>());
                }

                curNode = curNode.children[charIndex];
                return(curNode.mostFrequentSents);
            }
        }
        public AutocompleteSystem2(string[] sentences, int[] times)
        {
            root    = new TrieNode2();
            curNode = root;
            curSb   = new StringBuilder();
            map     = new Dictionary <string, int>();

            for (int i = 0; i < sentences.Length; i++)
            {
                string curSent  = sentences[i];
                int    curTimes = times[i];
                var    node     = root;
                map.Add(curSent, curTimes);
                foreach (char ch in curSent)
                {
                    int charIndex = ch == ' ' ? 26 : ch - 'a';
                    if (node.children[charIndex] == null)
                    {
                        node.children[charIndex] = new TrieNode2();
                    }

                    node = node.children[charIndex];
                    OrderFrequentSentences(node, curSent);
                }
            }
        }
        private void OrderFrequentSentences(TrieNode2 node, string sent)
        {
            if (!node.mostFrequentSents.Contains(sent))
            {
                node.mostFrequentSents.Add(sent);
            }

            node.mostFrequentSents.Sort((a, b) => map[a] == map[b]
                ? string.Compare(a, b, StringComparison.Ordinal)
                : map[b] - map[a]);

            if (node.mostFrequentSents.Count > 3)
            {
                node.mostFrequentSents.RemoveAt(3);
            }
        }
Exemple #9
0
        /// <summary>
        /// 设置关键字
        /// </summary>
        /// <param name="keywords">关键字列表</param>
        public void SetKeywords(IDictionary <string, int> keywords)
        {
            var first = new TrieNode2[char.MaxValue + 1];
            var root  = new TrieNode2();

            foreach (var key in keywords)
            {
                var p = key.Key;
                if (string.IsNullOrEmpty(p))
                {
                    continue;
                }

                var nd = first[p[0]];
                if (nd == null)
                {
                    nd          = root.Add(p[0]);
                    first[p[0]] = nd;
                }
                for (int i = 1; i < p.Length; i++)
                {
                    nd = nd.Add(p[i]);
                }
                nd.SetResults(p, key.Value);
            }
            this._first = first;

            Dictionary <TrieNode2, TrieNode2> links = new Dictionary <TrieNode2, TrieNode2>();

            foreach (var item in root.m_values)
            {
                TryLinks(item.Value, null, links);
            }

            foreach (var item in links)
            {
                item.Key.Merge(item.Value);
            }

            //_root = root;
        }
            public void InsertWord(string s)
            {
                var cur = this;

                for (int i = 0; i < s.Length; i++)
                {
                    char      c     = s[i];
                    TrieNode2 cnode = cur[c];
                    if (cnode == null)
                    {
                        cnode  = new TrieNode2();
                        cur[c] = cnode;
                    }

                    if (i == s.Length - 1)
                    {
                        cnode.Word = s;
                    }

                    cur = cnode;
                }
            }
Exemple #11
0
 private void TryLinks(TrieNode2 node, TrieNode2 node2, Dictionary <TrieNode2, TrieNode2> links)
 {
     foreach (var item in node.m_values)
     {
         TrieNode2 tn;
         if (node2 == null)
         {
             tn = _first[item.Key];
             if (tn != null)
             {
                 links[item.Value] = tn;
             }
         }
         else
         {
             if (node2.TryGetValue(item.Key, out tn))
             {
                 links[item.Value] = tn;
             }
         }
         TryLinks(item.Value, tn, links);
     }
 }
Exemple #12
0
        /// <summary>
        /// 在文本中替换所有的关键字
        /// </summary>
        /// <param name="text">文本</param>
        /// <param name="replaceChar">替换符</param>
        /// <returns></returns>
        public string Replace(string text, char replaceChar = '*')
        {
            StringBuilder result = new StringBuilder(text);

            TrieNode2 ptr = null;

            for (int i = 0; i < text.Length; i++)
            {
                TrieNode2 tn;
                if (ptr == null)
                {
                    tn = _first[text[i]];
                }
                else
                {
                    if (ptr.TryGetValue(text[i], out tn) == false)
                    {
                        tn = _first[text[i]];
                    }
                }
                if (tn != null)
                {
                    if (tn.End)
                    {
                        var maxLength = _keywords[tn.Results[0]].Length;

                        var start = i + 1 - maxLength;
                        for (int j = start; j <= i; j++)
                        {
                            result[j] = replaceChar;
                        }
                    }
                }
                ptr = tn;
            }
            return(result.ToString());
        }
Exemple #13
0
        private void SetKeywords()
        {
            var root = new TrieNode();

            List <TrieNode> allNode = new List <TrieNode>();

            allNode.Add(root);
            Dictionary <int, List <TrieNode> > allNodeLayers = new Dictionary <int, List <TrieNode> >();

            for (int i = 0; i < _keywords.Length; i++)
            {
                var p  = _keywords[i];
                var nd = root;
                for (int j = 0; j < p.Length; j++)
                {
                    nd = nd.Add((char)p[j]);
                    if (nd.Layer == 0)
                    {
                        nd.Layer = j + 1;
                        List <TrieNode> trieNodes;
                        if (allNodeLayers.TryGetValue(nd.Layer, out trieNodes) == false)
                        {
                            trieNodes = new List <TrieNode>();
                            allNodeLayers[nd.Layer] = trieNodes;
                        }
                        trieNodes.Add(nd);
                    }
                }
                nd.SetResults(i);
            }

            foreach (var trieNodes in allNodeLayers)
            {
                foreach (var nd in trieNodes.Value)
                {
                    allNode.Add(nd);
                }
            }
            allNodeLayers = null;

            List <TrieNode> nodes = new List <TrieNode>();

            // Find failure functions
            // level 1 nodes - fail to root node
            foreach (TrieNode nd in root.m_values.Values)
            {
                nd.Failure = root;
                foreach (TrieNode trans in nd.m_values.Values)
                {
                    nodes.Add(trans);
                }
            }
            // other nodes - using BFS
            while (nodes.Count != 0)
            {
                List <TrieNode> newNodes = new List <TrieNode>();
                foreach (TrieNode nd in nodes)
                {
                    TrieNode r = nd.Parent.Failure;
                    char     c = nd.Char;

                    while (r != null && !r.m_values.ContainsKey(c))
                    {
                        r = r.Failure;
                    }
                    if (r == null)
                    {
                        nd.Failure = root;
                    }
                    else
                    {
                        nd.Failure = r.m_values[c];
                        foreach (var result in nd.Failure.Results)
                        {
                            nd.SetResults(result);
                        }
                    }
                    // add child nodes to BFS list
                    foreach (TrieNode child in nd.m_values.Values)
                    {
                        newNodes.Add(child);
                    }
                }
                nodes = newNodes;
            }
            root.Failure = root;

            for (int i = 0; i < allNode.Count; i++)
            {
                allNode[i].Index = i;
            }

            var allNode2 = new List <TrieNode2>();

            for (int i = 0; i < allNode.Count; i++)
            {
                allNode2.Add(new TrieNode2());
            }
            for (int i = 0; i < allNode2.Count; i++)
            {
                var oldNode = allNode[i];
                var newNode = allNode2[i];

                foreach (var item in oldNode.m_values)
                {
                    var key   = item.Key;
                    var index = item.Value.Index;
                    newNode.Add(key, allNode2[index]);
                }
                foreach (var item in oldNode.Results)
                {
                    newNode.SetResults(item);
                }
                if (oldNode.Failure != root)
                {
                    foreach (var item in oldNode.Failure.m_values)
                    {
                        var key   = item.Key;
                        var index = item.Value.Index;
                        if (newNode.HasKey(key) == false)
                        {
                            newNode.Add(key, allNode2[index]);
                        }
                    }
                    foreach (var item in oldNode.Failure.Results)
                    {
                        newNode.SetResults(item);
                    }
                }
            }
            allNode.Clear();
            allNode = null;
            root    = null;

            TrieNode2[] first = new TrieNode2[char.MaxValue + 1];
            foreach (var item in allNode2[0].m_values)
            {
                first[item.Key] = item.Value;
            }
            _first = first;
        }
            public bool Find(string text, string hz, string[] pinyins)
            {
                TrieNode2 ptr = null;

                for (int i = 0; i < text.Length; i++)
                {
                    TrieNode2 tn;
                    if (ptr == null)
                    {
                        tn = _first[text[i]];
                    }
                    else
                    {
                        if (ptr.TryGetValue(text[i], out tn) == false)
                        {
                            tn = _first[text[i]];
                        }
                    }
                    if (tn != null)
                    {
                        if (tn.End)
                        {
                            foreach (var result in tn.Results)
                            {
                                var  keyword        = _keywords[result];
                                var  start          = i + 1 - keyword.Length;
                                var  end            = i;
                                bool isok           = true;
                                var  keywordPinyins = _keywordPinyins[result];


                                for (int j = 0; j < keyword.Length; j++)
                                {
                                    var idx = start + j;
                                    var py  = keywordPinyins[j];
                                    if (py.Length == 1 && py[0] >= 0x3400 && py[0] <= 0x9fd5)
                                    {
                                        if (hz[idx] != py[0])
                                        {
                                            isok = false;
                                            break;
                                        }
                                    }
                                    else
                                    {
                                        if (pinyins[idx].StartsWith(py) == false)
                                        {
                                            isok = false;
                                            break;
                                        }
                                    }
                                }
                                if (isok)
                                {
                                    return(true);
                                }
                            }
                        }
                    }
                    ptr = tn;
                }
                return(false);
            }
            private void SetKeywords()
            {
                var root = new TrieNode();
                Dictionary <int, List <TrieNode> > allNodeLayers = new Dictionary <int, List <TrieNode> >();

                for (int i = 0; i < _keywords.Length; i++)
                {
                    var p  = _keywords[i];
                    var nd = root;
                    for (int j = 0; j < p.Length; j++)
                    {
                        nd = nd.Add((char)p[j]);
                        if (nd.Layer == 0)
                        {
                            nd.Layer = j + 1;
                            List <TrieNode> trieNodes;
                            if (allNodeLayers.TryGetValue(nd.Layer, out trieNodes) == false)
                            {
                                trieNodes = new List <TrieNode>();
                                allNodeLayers[nd.Layer] = trieNodes;
                            }
                            trieNodes.Add(nd);
                        }
                    }
                    nd.SetResults(i);
                }

                List <TrieNode> allNode = new List <TrieNode>();

                allNode.Add(root);
                foreach (var trieNodes in allNodeLayers)
                {
                    foreach (var nd in trieNodes.Value)
                    {
                        allNode.Add(nd);
                    }
                }
                allNodeLayers = null;


                for (int i = 1; i < allNode.Count; i++)
                {
                    var nd = allNode[i];
                    nd.Index = i;
                    TrieNode r = nd.Parent.Failure;
                    char     c = nd.Char;
                    while (r != null && !r.m_values.ContainsKey(c))
                    {
                        r = r.Failure;
                    }
                    if (r == null)
                    {
                        nd.Failure = root;
                    }
                    else
                    {
                        nd.Failure = r.m_values[c];
                        foreach (var result in nd.Failure.Results)
                        {
                            nd.SetResults(result);
                        }
                    }
                }
                root.Failure = root;


                var allNode2 = new List <TrieNode2>();

                for (int i = 0; i < allNode.Count; i++)
                {
                    allNode2.Add(new TrieNode2());
                }
                for (int i = 0; i < allNode2.Count; i++)
                {
                    var oldNode = allNode[i];
                    var newNode = allNode2[i];

                    foreach (var item in oldNode.m_values)
                    {
                        var key   = item.Key;
                        var index = item.Value.Index;
                        newNode.Add(key, allNode2[index]);
                    }
                    foreach (var item in oldNode.Results)
                    {
                        newNode.SetResults(item);
                    }
                    oldNode = oldNode.Failure;
                    while (oldNode != root)
                    {
                        foreach (var item in oldNode.m_values)
                        {
                            var key   = item.Key;
                            var index = item.Value.Index;
                            if (newNode.HasKey(key) == false)
                            {
                                newNode.Add(key, allNode2[index]);
                            }
                        }
                        foreach (var item in oldNode.Results)
                        {
                            newNode.SetResults(item);
                        }
                        oldNode = oldNode.Failure;
                    }
                }
                allNode.Clear();
                allNode = null;
                root    = null;

                TrieNode2[] first = new TrieNode2[char.MaxValue + 1];
                foreach (var item in allNode2[0].m_values)
                {
                    first[item.Key] = item.Value;
                }
                _first = first;
            }
        public IList <string> WordBreak5(string s, ISet <string> wordDict)
        {
            var  root         = new TrieNode2();
            int  maxLenWord   = 0;
            bool endReachable = false;

            // 1. construct trie with dictionary words
            foreach (string word in wordDict)
            {
                if (word.Length > maxLenWord)
                {
                    maxLenWord = word.Length;
                }

                root.InsertWord(word);
            }

            // we store the words which start at various indices in this map
            // if multiple words can start at 1 index, they are stored in a linked
            // list at that location in the map
            var indexWordMap = new Dictionary <int, List <string> >();

            // 2. go thru string, consider suffixes of increasing length
            for (int j = s.Length - 1; j >= 0; j--)
            {
                int       k = j;
                TrieNode2 n = root;
                while (n != null && n.Count != 0 && k < s.Length)
                {
                    // 3. traverse the trie using a prefix of this particular
                    // suffix and see if you can reach words which end in
                    // indices which have words starting from them
                    n = n[s[k]];
                    if (n != null && n.Word != null &&
                        (j + n.Word.Length == s.Length || indexWordMap[j + n.Word.Length] != null))
                    {
                        if (!indexWordMap.ContainsKey(j))
                        {
                            indexWordMap[j] = new List <string>();
                        }

                        indexWordMap[j].Add(n.Word);
                    }

                    if (n != null)
                    {
                        k++;
                        if (k >= s.Length)
                        {
                            endReachable = true;
                        }
                    }
                }
                if (!endReachable && j < s.Length - maxLenWord)
                {
                    // leetcode oj doesn't accept null, balls...
                    return(new List <string>());
                }
            }

            // now we have a graph which can be traversed from start (0)
            // to the last node to get a sentence.
            // do a depth first traversal with no visited node check to
            // print out all sentences
            List <string> ll        = new List <string>();
            List <string> sentences = new List <string>();

            GetSentences(s.Length, indexWordMap, 0, ll, sentences);
            return(new List <string>(sentences));
        }