/// <summary> /// 在文本中查找所有的关键字 /// </summary> /// <param name="text">文本</param> /// <returns></returns> public List <WordsSearchResult> FindAll(string text) { TrieNode2 ptr = null; List <WordsSearchResult> list = new List <WordsSearchResult>(); for (int i = 0; i < text.Length; i++) { TrieNode2 tn; if (ptr == null) { tn = _first[text[i]]; } else { if (ptr.TryGetValue(text[i], out tn) == false) { tn = _first[text[i]]; } } if (tn != null) { if (tn.End) { foreach (var item in tn.Results) { var keyword = _keywords[item]; list.Add(new WordsSearchResult(keyword, i + 1 - keyword.Length, i, item)); } } } ptr = tn; } return(list); }
/// <summary> /// 判断文本是否包含关键字 /// </summary> /// <param name="text">文本</param> /// <returns></returns> public bool ContainsAny(string text) { TrieNode2 ptr = null; foreach (char t in text) { TrieNode2 tn; if (ptr == null) { tn = _first[t]; } else { if (ptr.TryGetValue(t, out tn) == false) { tn = _first[t]; } } if (tn != null) { if (tn.End) { return(true); } } ptr = tn; } return(false); }
/// <summary> /// 在文本中查找第一个关键字 /// </summary> /// <param name="text">文本</param> /// <returns></returns> public WordsSearchResult FindFirst(string text) { TrieNode2 ptr = null; for (int i = 0; i < text.Length; i++) { TrieNode2 tn; if (ptr == null) { tn = _first[text[i]]; } else { if (ptr.TryGetValue(text[i], out tn) == false) { tn = _first[text[i]]; } } if (tn != null) { if (tn.End) { var item = tn.Results[0]; var keyword = _keywords[item]; return(new WordsSearchResult(keyword, i + 1 - keyword.Length, i, item)); } } ptr = tn; } return(null); }
/// <summary> /// 在文本中查找所有的关键字 /// </summary> /// <param name="text">文本</param> /// <returns></returns> public List <string> FindAll(string text) { TrieNode2 ptr = null; List <string> list = new List <string>(); foreach (char t in text) { TrieNode2 tn; if (ptr == null) { tn = _first[t]; } else { if (ptr.TryGetValue(t, out tn) == false) { tn = _first[t]; } } if (tn != null) { if (tn.End) { foreach (var item in tn.Results) { list.Add(_keywords[item]); } } } ptr = tn; } return(list); }
/// <summary> /// 在文本中查找第一个关键字 /// </summary> /// <param name="text">文本</param> /// <returns></returns> public string FindFirst(string text) { TrieNode2 ptr = null; foreach (char t in text) { TrieNode2 tn; if (ptr == null) { tn = _first[t]; } else { if (ptr.TryGetValue(t, out tn) == false) { tn = _first[t]; } } if (tn != null) { if (tn.End) { return(_keywords[tn.Results[0]]); } } ptr = tn; } return(null); }
public IList <string> Input(char c) { if (c == '#') { string sent = curSb.ToString(); AddSentence(sent); curNode = root; curSb = new StringBuilder(); return(new List <string>()); } else { curSb.Append(c); int charIndex = c == ' ' ? 26 : c - 'a'; if (curNode == null || curNode.children[charIndex] == null) { curNode = null; return(new List <string>()); } curNode = curNode.children[charIndex]; return(curNode.mostFrequentSents); } }
public AutocompleteSystem2(string[] sentences, int[] times) { root = new TrieNode2(); curNode = root; curSb = new StringBuilder(); map = new Dictionary <string, int>(); for (int i = 0; i < sentences.Length; i++) { string curSent = sentences[i]; int curTimes = times[i]; var node = root; map.Add(curSent, curTimes); foreach (char ch in curSent) { int charIndex = ch == ' ' ? 26 : ch - 'a'; if (node.children[charIndex] == null) { node.children[charIndex] = new TrieNode2(); } node = node.children[charIndex]; OrderFrequentSentences(node, curSent); } } }
private void OrderFrequentSentences(TrieNode2 node, string sent) { if (!node.mostFrequentSents.Contains(sent)) { node.mostFrequentSents.Add(sent); } node.mostFrequentSents.Sort((a, b) => map[a] == map[b] ? string.Compare(a, b, StringComparison.Ordinal) : map[b] - map[a]); if (node.mostFrequentSents.Count > 3) { node.mostFrequentSents.RemoveAt(3); } }
/// <summary> /// 设置关键字 /// </summary> /// <param name="keywords">关键字列表</param> public void SetKeywords(IDictionary <string, int> keywords) { var first = new TrieNode2[char.MaxValue + 1]; var root = new TrieNode2(); foreach (var key in keywords) { var p = key.Key; if (string.IsNullOrEmpty(p)) { continue; } var nd = first[p[0]]; if (nd == null) { nd = root.Add(p[0]); first[p[0]] = nd; } for (int i = 1; i < p.Length; i++) { nd = nd.Add(p[i]); } nd.SetResults(p, key.Value); } this._first = first; Dictionary <TrieNode2, TrieNode2> links = new Dictionary <TrieNode2, TrieNode2>(); foreach (var item in root.m_values) { TryLinks(item.Value, null, links); } foreach (var item in links) { item.Key.Merge(item.Value); } //_root = root; }
public void InsertWord(string s) { var cur = this; for (int i = 0; i < s.Length; i++) { char c = s[i]; TrieNode2 cnode = cur[c]; if (cnode == null) { cnode = new TrieNode2(); cur[c] = cnode; } if (i == s.Length - 1) { cnode.Word = s; } cur = cnode; } }
private void TryLinks(TrieNode2 node, TrieNode2 node2, Dictionary <TrieNode2, TrieNode2> links) { foreach (var item in node.m_values) { TrieNode2 tn; if (node2 == null) { tn = _first[item.Key]; if (tn != null) { links[item.Value] = tn; } } else { if (node2.TryGetValue(item.Key, out tn)) { links[item.Value] = tn; } } TryLinks(item.Value, tn, links); } }
/// <summary> /// 在文本中替换所有的关键字 /// </summary> /// <param name="text">文本</param> /// <param name="replaceChar">替换符</param> /// <returns></returns> public string Replace(string text, char replaceChar = '*') { StringBuilder result = new StringBuilder(text); TrieNode2 ptr = null; for (int i = 0; i < text.Length; i++) { TrieNode2 tn; if (ptr == null) { tn = _first[text[i]]; } else { if (ptr.TryGetValue(text[i], out tn) == false) { tn = _first[text[i]]; } } if (tn != null) { if (tn.End) { var maxLength = _keywords[tn.Results[0]].Length; var start = i + 1 - maxLength; for (int j = start; j <= i; j++) { result[j] = replaceChar; } } } ptr = tn; } return(result.ToString()); }
private void SetKeywords() { var root = new TrieNode(); List <TrieNode> allNode = new List <TrieNode>(); allNode.Add(root); Dictionary <int, List <TrieNode> > allNodeLayers = new Dictionary <int, List <TrieNode> >(); for (int i = 0; i < _keywords.Length; i++) { var p = _keywords[i]; var nd = root; for (int j = 0; j < p.Length; j++) { nd = nd.Add((char)p[j]); if (nd.Layer == 0) { nd.Layer = j + 1; List <TrieNode> trieNodes; if (allNodeLayers.TryGetValue(nd.Layer, out trieNodes) == false) { trieNodes = new List <TrieNode>(); allNodeLayers[nd.Layer] = trieNodes; } trieNodes.Add(nd); } } nd.SetResults(i); } foreach (var trieNodes in allNodeLayers) { foreach (var nd in trieNodes.Value) { allNode.Add(nd); } } allNodeLayers = null; List <TrieNode> nodes = new List <TrieNode>(); // Find failure functions // level 1 nodes - fail to root node foreach (TrieNode nd in root.m_values.Values) { nd.Failure = root; foreach (TrieNode trans in nd.m_values.Values) { nodes.Add(trans); } } // other nodes - using BFS while (nodes.Count != 0) { List <TrieNode> newNodes = new List <TrieNode>(); foreach (TrieNode nd in nodes) { TrieNode r = nd.Parent.Failure; char c = nd.Char; while (r != null && !r.m_values.ContainsKey(c)) { r = r.Failure; } if (r == null) { nd.Failure = root; } else { nd.Failure = r.m_values[c]; foreach (var result in nd.Failure.Results) { nd.SetResults(result); } } // add child nodes to BFS list foreach (TrieNode child in nd.m_values.Values) { newNodes.Add(child); } } nodes = newNodes; } root.Failure = root; for (int i = 0; i < allNode.Count; i++) { allNode[i].Index = i; } var allNode2 = new List <TrieNode2>(); for (int i = 0; i < allNode.Count; i++) { allNode2.Add(new TrieNode2()); } for (int i = 0; i < allNode2.Count; i++) { var oldNode = allNode[i]; var newNode = allNode2[i]; foreach (var item in oldNode.m_values) { var key = item.Key; var index = item.Value.Index; newNode.Add(key, allNode2[index]); } foreach (var item in oldNode.Results) { newNode.SetResults(item); } if (oldNode.Failure != root) { foreach (var item in oldNode.Failure.m_values) { var key = item.Key; var index = item.Value.Index; if (newNode.HasKey(key) == false) { newNode.Add(key, allNode2[index]); } } foreach (var item in oldNode.Failure.Results) { newNode.SetResults(item); } } } allNode.Clear(); allNode = null; root = null; TrieNode2[] first = new TrieNode2[char.MaxValue + 1]; foreach (var item in allNode2[0].m_values) { first[item.Key] = item.Value; } _first = first; }
public bool Find(string text, string hz, string[] pinyins) { TrieNode2 ptr = null; for (int i = 0; i < text.Length; i++) { TrieNode2 tn; if (ptr == null) { tn = _first[text[i]]; } else { if (ptr.TryGetValue(text[i], out tn) == false) { tn = _first[text[i]]; } } if (tn != null) { if (tn.End) { foreach (var result in tn.Results) { var keyword = _keywords[result]; var start = i + 1 - keyword.Length; var end = i; bool isok = true; var keywordPinyins = _keywordPinyins[result]; for (int j = 0; j < keyword.Length; j++) { var idx = start + j; var py = keywordPinyins[j]; if (py.Length == 1 && py[0] >= 0x3400 && py[0] <= 0x9fd5) { if (hz[idx] != py[0]) { isok = false; break; } } else { if (pinyins[idx].StartsWith(py) == false) { isok = false; break; } } } if (isok) { return(true); } } } } ptr = tn; } return(false); }
private void SetKeywords() { var root = new TrieNode(); Dictionary <int, List <TrieNode> > allNodeLayers = new Dictionary <int, List <TrieNode> >(); for (int i = 0; i < _keywords.Length; i++) { var p = _keywords[i]; var nd = root; for (int j = 0; j < p.Length; j++) { nd = nd.Add((char)p[j]); if (nd.Layer == 0) { nd.Layer = j + 1; List <TrieNode> trieNodes; if (allNodeLayers.TryGetValue(nd.Layer, out trieNodes) == false) { trieNodes = new List <TrieNode>(); allNodeLayers[nd.Layer] = trieNodes; } trieNodes.Add(nd); } } nd.SetResults(i); } List <TrieNode> allNode = new List <TrieNode>(); allNode.Add(root); foreach (var trieNodes in allNodeLayers) { foreach (var nd in trieNodes.Value) { allNode.Add(nd); } } allNodeLayers = null; for (int i = 1; i < allNode.Count; i++) { var nd = allNode[i]; nd.Index = i; TrieNode r = nd.Parent.Failure; char c = nd.Char; while (r != null && !r.m_values.ContainsKey(c)) { r = r.Failure; } if (r == null) { nd.Failure = root; } else { nd.Failure = r.m_values[c]; foreach (var result in nd.Failure.Results) { nd.SetResults(result); } } } root.Failure = root; var allNode2 = new List <TrieNode2>(); for (int i = 0; i < allNode.Count; i++) { allNode2.Add(new TrieNode2()); } for (int i = 0; i < allNode2.Count; i++) { var oldNode = allNode[i]; var newNode = allNode2[i]; foreach (var item in oldNode.m_values) { var key = item.Key; var index = item.Value.Index; newNode.Add(key, allNode2[index]); } foreach (var item in oldNode.Results) { newNode.SetResults(item); } oldNode = oldNode.Failure; while (oldNode != root) { foreach (var item in oldNode.m_values) { var key = item.Key; var index = item.Value.Index; if (newNode.HasKey(key) == false) { newNode.Add(key, allNode2[index]); } } foreach (var item in oldNode.Results) { newNode.SetResults(item); } oldNode = oldNode.Failure; } } allNode.Clear(); allNode = null; root = null; TrieNode2[] first = new TrieNode2[char.MaxValue + 1]; foreach (var item in allNode2[0].m_values) { first[item.Key] = item.Value; } _first = first; }
public IList <string> WordBreak5(string s, ISet <string> wordDict) { var root = new TrieNode2(); int maxLenWord = 0; bool endReachable = false; // 1. construct trie with dictionary words foreach (string word in wordDict) { if (word.Length > maxLenWord) { maxLenWord = word.Length; } root.InsertWord(word); } // we store the words which start at various indices in this map // if multiple words can start at 1 index, they are stored in a linked // list at that location in the map var indexWordMap = new Dictionary <int, List <string> >(); // 2. go thru string, consider suffixes of increasing length for (int j = s.Length - 1; j >= 0; j--) { int k = j; TrieNode2 n = root; while (n != null && n.Count != 0 && k < s.Length) { // 3. traverse the trie using a prefix of this particular // suffix and see if you can reach words which end in // indices which have words starting from them n = n[s[k]]; if (n != null && n.Word != null && (j + n.Word.Length == s.Length || indexWordMap[j + n.Word.Length] != null)) { if (!indexWordMap.ContainsKey(j)) { indexWordMap[j] = new List <string>(); } indexWordMap[j].Add(n.Word); } if (n != null) { k++; if (k >= s.Length) { endReachable = true; } } } if (!endReachable && j < s.Length - maxLenWord) { // leetcode oj doesn't accept null, balls... return(new List <string>()); } } // now we have a graph which can be traversed from start (0) // to the last node to get a sentence. // do a depth first traversal with no visited node check to // print out all sentences List <string> ll = new List <string>(); List <string> sentences = new List <string>(); GetSentences(s.Length, indexWordMap, 0, ll, sentences); return(new List <string>(sentences)); }