public void TestCreateBasicTrieTreeWithPOSType() { TrieTree tt = TrieTree.GetInstance(); tt.AddWord("测试", POSType.D_N); tt.AddWord("你好", POSType.UNKNOWN); tt.AddWord("我们", POSType.D_R); tt.AddWord("你们", POSType.D_R); Assert.AreEqual(3, tt.Root.Children.Count); TrieTreeNode tn1 = tt.GetNode("你们", (int)POSType.D_N); Assert.IsNull(tn1); TrieTreeNode tn2 = tt.GetNode("你们", (int)POSType.D_R); Assert.IsNotNull(tn2); Assert.AreEqual("你们", tn2.Word); Assert.IsTrue(tn2.Children.Count == 0); Assert.IsNull(tt.GetNode("它们")); TrieTreeNode tn = tt.GetNode('你'); Assert.AreEqual("你", tn.Word); Assert.AreEqual(2, tn.Children.Count); tn = tt.GetNode('你', (int)POSType.UNKNOWN); Assert.AreEqual("你", tn.Word); Assert.AreEqual(1, tt.GetNode("我").Children.Count); }
internal TrieTree(int k, T leaf, TrieTree t0, TrieTree t1) { this.k = k; this.leaf = leaf; this.t0 = t0; this.t1 = t1; }
public IList <string> FindWords(char[][] board, string[] words) { // dfs + backtracking // tc:O(m*n*4^(k-1)); sc:O(m*n) if (board == null && board.Length == 0) //corner case { return(new List <string>()); } bool[,] visited = new bool[board.Length, board[0].Length]; TrieTree trie = new TrieTree(new TrieNode()); // build the trie tree foreach (string wd in words) // insert words into trie tree { trie.InsertNode(wd); } IList <string> res = new List <string>(); for (int i = 0; i < board.Length; i++) { for (int j = 0; j < board[i].Length; j++) { SearchWord(board, visited, res, trie.root, i, j); } } return(res); }
/// <summary> /// For all p: p is equivalent to Search(p). /// For all p and q: if p is equivalent to q then Search(p) equals Search(q). /// </summary> /// <param name="p">given predicate</param> public T Search(T p) { if (tree == null) { tree = new TrieTree(0, p, null, null); idCache[p] = p; count = 1; return(p); } else { T id; if (!idCache.TryGetValue(p, out id)) { id = Insert(tree, p); if (!idCache.ContainsKey(id)) { //then p == id and p is in a new leaf count += 1; } idCache[p] = id; } return(id); } }
public void TwoAnd() { TrieTree tree = new TrieTree(); Assert.AreEqual(1, tree.AddNodes("and").Frequence); Assert.AreEqual(2, tree.AddNodes("and").Frequence); Assert.AreEqual(1, tree.AddNodes("ande").Frequence); }
internal static TrieTree MkInitialTree(IBooleanAlgebra <T> algebra) { var t0 = new TrieTree(1, algebra.False, null, null); var t1 = new TrieTree(1, algebra.True, null, null); var tree = new TrieTree(0, default(T), t0, t1); // any element implies True and does not imply False return(tree); }
public void When_Root_Has_MoreThanOneNode() { TrieTree tree = new TrieTree(); Assert.AreEqual(1, tree.AddNodes("and").Frequence); Assert.AreEqual(1, tree.AddNodes("bne").Frequence); Assert.AreEqual(1, tree.AddNodes("bnd").Frequence); Assert.AreEqual(1, tree.AddNodes("cnee").Frequence); }
public void Build(string path, Contents contents) { Hashtable lex = new Hashtable(); Hashtable metainfo = new Hashtable(); metainfo.Add("version", m_meta_info.Version); metainfo.Add("title", m_meta_info.Title); metainfo.Add("author", m_meta_info.Author); metainfo.Add("creator", m_meta_info.Creator); metainfo.Add("contents", m_meta_info.Contents); metainfo.Add("count", m_meta_info.Count); metainfo.Add("size", m_meta_info.Size); metainfo.Add("terminal", m_meta_info.Terminal); lex["metainfo"] = metainfo; List <string> words = m_word_list.Words; if ((contents & Contents.WordsBackward) != 0) { TrieTree tree_b = new TrieTree(); for (int i = 0; i < words.Count; i++) { tree_b.AddString(this.ReverseString(words[i])); } ArrayList al_b = tree_b.GetDoubleArrays(); lex["chars_b"] = al_b[0]; lex["idxes_b"] = al_b[1]; } if ((contents & Contents.WordsForward) != 0) { TrieTree tree_f = new TrieTree(); for (int i = 0; i < words.Count; i++) { tree_f.AddString(words[i]); } ArrayList al_f = tree_f.GetDoubleArrays(); lex["chars_f"] = al_f[0]; lex["idxes_f"] = al_f[1]; } if ((contents & Contents.FreqsWords) != 0) { lex["freqs_words"] = m_freq_list.Words; } if ((contents & Contents.FreqsChars) != 0) { lex["freqs_chars"] = m_freq_list.Chars; } System.Runtime.Serialization.Formatters.Binary.BinaryFormatter formatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write); formatter.Serialize(fs, lex); fs.Close(); }
public override void ExecuteCommand(TrieTreeSession session, StringCommandInfo commandInfo) { logger.Info("receive command RMMFetch, argument: " + commandInfo.Data); Stopwatch sw = new Stopwatch(); sw.Start(); var tt = TrieTree.GetInstance(); string[] splits = commandInfo.Data.Split(new char[] { SOH }); int maxlength = 0; string text = splits[0]; int pos = 0; if (splits.Length > 1) { pos = Int32.Parse(splits[1]); } maxlength = text.Length; string temp = null; TrieTreeNode node = null; for (int j = 0; j < maxlength; j++) { temp = text.Substring(j, maxlength - j); node = tt.GetNode(temp, pos); if (node != null) { break; } } sw.Stop(); logger.InfoFormat("timer: {0} ms", sw.ElapsedMilliseconds); string result = null; if (node == null) { result = string.Format("0{0}", SOH); } else { result = string.Format("1{0}{1}{0}{2}{0}{3}{0}", SOH, node.Word, node.Frequency, node.POSValue); } if (node != null) { logger.InfoFormat("return result: {0}, {1}", node.Word, (POSType)node.POSValue); } else { logger.InfoFormat("return result: not found"); } session.SendResponse(result); }
public void NoStatsUntilOptimize() { var tree = new TrieTree(); Assert.IsNull(tree.Statistics); tree.AddWord("aa"); Assert.IsNull(tree.Statistics); tree.Optimize(); Assert.IsNotNull(tree.Statistics); }
public void TestLoadTrieTreeFromPanguDict() { var mockPanguProviderSetting = new Mock <IDataProviderSetting>(); mockPanguProviderSetting.Setup(s => s.Uri).Returns(DictLoadTest.UnitTestProjectFolder + @"\Data\panguDict.dct"); mockPanguProviderSetting.Setup(s => s.ProviderType).Returns("PanguDictProvider"); var panguProvider = new PanguDictProvider(mockPanguProviderSetting.Object); TrieTree tt = TrieFactory.LoadFromDataProvider(panguProvider); TrieTreeNode ttn = tt.GetNode("测试"); Assert.IsNotNull(ttn); Assert.AreEqual(POSType.D_N, (POSType)ttn.POSValue); }
public void StatsWhenEmptyTree() { var tree = new TrieTree(); tree.Optimize(); var stats = tree.Statistics; Assert.AreEqual(0, stats.AverageChildCount, "AverageChildCount"); Assert.AreEqual(0, stats.AverageWordLength, "AverageWordLength"); Assert.AreEqual(0, stats.CummulativeWordLength, "CummulativeWordLength"); Assert.AreEqual(0, stats.MaxWordLength, "MaxWordLength"); Assert.AreEqual(1, stats.NodeCount, "NodeCount"); Assert.AreEqual(0, stats.WordCount, "WordCount"); }
public void TestLoadTrieTreeFromMongoDB() { var mockProviderSetting = new Mock <IDataProviderSetting>(); mockProviderSetting.Setup(s => s.Uri).Returns("mongodb://localhost:28001"); mockProviderSetting.Setup(s => s.ProviderType).Returns("MongoDBDataProvider"); mockProviderSetting.Setup(s => s.DBName).Returns("nameResearch"); mockProviderSetting.Setup(s => s.CollectionName).Returns("placeNames"); var dataProvider = new MongoDBDataProvider(mockProviderSetting.Object); TrieTree tt = TrieFactory.LoadFromDataProvider(dataProvider); Assert.IsNotNull(tt); Assert.IsNotNull(tt.Root); Assert.IsTrue(tt.Root.Children.Count > 100); }
public Setting(string file) { using (StreamReader sr = System.IO.File.OpenText(file)) { target = sr.ReadLine(); string s; pattern = new TrieTree <byte>(); Regex rex = new Regex(@"[0-9a-zA-Z][0-9a-zA-Z]"); while ((s = sr.ReadLine()) != null) { if (s == "" || s[0] == '#') { continue; } byte b; Match m = rex.Match(s); pattern.setRoot(); while (m.Success) { b = 0; b = oct(m.Value[0]); b <<= 4; b += oct(m.Value[1]); pattern.add(ref b); m = m.NextMatch(); } s = sr.ReadLine(); m = rex.Match(s); while (m.Success) { b = 0; b = oct(m.Value[0]); b <<= 4; b += oct(m.Value[1]); pattern.addSubstitue(b); m = m.NextMatch(); } } } }
public void Performance() { TrieTree tree = new TrieTree(); string words = GetEmbededResource(this.GetType().Assembly, "words.txt"); foreach (var item in words.Split('\n')) { if (string.IsNullOrEmpty(item)) { continue; } if (item.Trim().All(c => char.IsLetter(c))) { Assert.AreEqual(1, tree.AddNodes(item.Trim()).Frequence); } } Assert.AreEqual(1, tree.Search("good").Frequence); }
void UpdateTrie(bool refresh_query) { trie = new TrieTree(false /* !case_sensitive */); List <PersonLink> people = new List <PersonLink> (); Logger.Log("Loading up the person trie, Part 1..."); foreach (Person person in Galago.Global.GetPeople(Galago.Origin.Remote, refresh_query)) { string name = person.DisplayName; if (name != null) { people.Add(new PersonLink(LinkType.PersonDisplayName, person)); } foreach (Account account in person.GetAccounts(true)) { if (account.DisplayName != null) { people.Add(new PersonLink(LinkType.AccountDisplayName, account)); } if (account.Username != null && account.Username != account.DisplayName) { people.Add(new PersonLink(LinkType.AccountUserName, account)); } } } Logger.Log("Loading up the person trie, Part 2..."); foreach (PersonLink plink in people) { trie.AddKeyword(plink.LinkText, plink); } Logger.Log("Done."); }
public void StatsWhenSomeWords() { var tree = new TrieTree(); tree.AddWord("aaa"); tree.AddWord("abba"); tree.AddWord("aba"); tree.AddWord("abb"); tree.AddWord("abc"); tree.Optimize(); var stats = tree.Statistics; Assert.AreEqual(8 / 5, stats.AverageChildCount, "AverageChildCount"); Assert.AreEqual(16 / 5, stats.AverageWordLength, "AverageWordLength"); Assert.AreEqual(16, stats.CummulativeWordLength, "CummulativeWordLength"); Assert.AreEqual(4, stats.MaxWordLength, "MaxWordLength"); Assert.AreEqual(9, stats.NodeCount, "NodeCount"); Assert.AreEqual(5, stats.WordCount, "WordCount"); }
public string LongestWord(string[] words) { /* * 实现思路: * 1.使用数据源来“喂养”TrieTree * 2.再让TrieTree告知我们,满足题目需求的结果 * * 时间复杂度:输入的单词,是要遍历一次的,构建trieTree,然后要拿到结果,还是要做回溯的,那么是对树所有节点又遍历一次 * 空间复杂度:主要是TrieTree占用的存储空间 * * 使用此种方式,其实就是数据源越大越是有利 */ var trieTree = new TrieTree(); foreach (var wordItem in words) { trieTree.AddWord(wordItem); } return(trieTree.GetLongestWord()); }
public void TestCreateBasicTrieTree() { TrieTree tt = TrieTree.GetInstance(); tt.AddWord("测试"); tt.AddWord("你好"); tt.AddWord("我们"); tt.AddWord("你们"); Assert.AreEqual(3, tt.Root.Children.Count); TrieTreeNode tn1 = tt.GetNode("你们"); Assert.IsNotNull(tn1); Assert.AreEqual("你们", tn1.Word); Assert.IsTrue(tn1.Children.Count == 0); Assert.IsNull(tt.GetNode("它们")); TrieTreeNode tn = tt.GetNode('你'); Assert.AreEqual("你", tn.Word); Assert.AreEqual(2, tn.Children.Count); Assert.AreEqual(1, tt.GetNode("我").Children.Count); }
/// <summary> /// Save the gram data into stream. /// </summary> /// <param name="stream">Binary stream.</param> public void SaveToBinary(Stream stream) { if (stream == null) { throw new ArgumentNullException("stream"); } if (_grammarCount == 0) { throw new InvalidDataException("There is no nGram data"); } using (TrieTree graphemeTrieTree = new TrieTree(_graphemeDictionary)) { byte[] graphemeDictData = graphemeTrieTree.GetTrieData(); int graphemeDictLength = graphemeDictData.Length; // Keep data alignment as 2 bytes if (graphemeDictLength % 2 != 0) { graphemeDictLength++; } // Save NGramData GrammarState[] grammarStates = new GrammarState[_grammarCount + 2]; grammarStates[0] = new GrammarState(); grammarStates[0].ReferenceIndex = 1; Dictionary<string, int> grammarIndex = new Dictionary<string, int>(); int stateIndex = 1; int finalGramStateIndex = 0; for (int gram = 1; gram <= _maxNgram; gram++) { string lastReferredGrammar = string.Empty; if (gram == _maxNgram) { finalGramStateIndex = stateIndex; grammarStates[stateIndex++] = new GrammarState(); } foreach (string grammar in _nGramData[gram].Keys) { string[] graphemes = grammar.Split(GrammarSeparator, StringSplitOptions.RemoveEmptyEntries); // last grapheme string lastGrapheme = graphemes[graphemes.Length - 1]; int len = 0; int graphemeId = graphemeTrieTree.FindLongest(lastGrapheme, out len); Debug.Assert(graphemeId != -1); Debug.Assert(!grammarIndex.ContainsKey(grammar)); // Save the state index for easily query grammarIndex.Add(grammar, stateIndex); grammarStates[stateIndex] = new GrammarState(); grammarStates[stateIndex].GraphId = (GrapId)graphemeId; // Convert the probability into ProbabilityInt type with amplifier if (_nGramData[gram][grammar].Probability * _probabilityAmplifier < ProbabilityInt.MinValue) { grammarStates[stateIndex].Prob = ProbabilityInt.MinValue; } else { grammarStates[stateIndex].Prob = (ProbabilityInt)(_nGramData[gram][grammar].Probability * _probabilityAmplifier); } if (_nGramData[gram][grammar].Backoff * _probabilityAmplifier < short.MinValue) { grammarStates[stateIndex].Backoff = ProbabilityInt.MinValue; } else { grammarStates[stateIndex].Backoff = (ProbabilityInt)(_nGramData[gram][grammar].Backoff * _probabilityAmplifier); } // set the reference index for lower level gram data if (gram != 1) { string referredGrammar = graphemes[0]; for (int i = 1; i < graphemes.Length - 1; i++) { referredGrammar = referredGrammar + " " + graphemes[i]; } if (!referredGrammar.Equals(lastReferredGrammar, StringComparison.Ordinal)) { // Update the reference index for the lower level gram lastReferredGrammar = referredGrammar; Debug.Assert(grammarIndex.ContainsKey(lastReferredGrammar)); int referredIndex = grammarIndex[lastReferredGrammar]; Debug.Assert(grammarStates[referredIndex] != null); if (gram != _maxNgram) { grammarStates[referredIndex].ReferenceIndex = (ReferenceIndex)stateIndex; } else { grammarStates[referredIndex].ReferenceIndex = (ReferenceIndex)(stateIndex - finalGramStateIndex); } } } stateIndex++; } } // Save the model into binary stream BinaryWriter bw = new BinaryWriter(stream); { // Write the language ID bw.Write((ushort)_language); // Write the Gram Count bw.Write((ushort)this._maxNgram); // Write the Probability Amplifier bw.Write((int)_probabilityAmplifier); // Write the grammar state number bw.Write((uint)finalGramStateIndex); // Write the Final grammar state number bw.Write((uint)(_grammarCount + 2 - finalGramStateIndex)); int headerSize = sizeof(ushort) + sizeof(ushort) + sizeof(int) + sizeof(uint) + sizeof(uint) + sizeof(uint) + sizeof(uint) + sizeof(uint); // Write the offset of Dictionary bw.Write((uint)headerSize); // Write the offset of Grammar State bw.Write((uint)(headerSize + graphemeDictLength)); // Write the offset of Final Grammar State bw.Write((uint)(headerSize + graphemeDictLength + (finalGramStateIndex * (sizeof(GrapId) + sizeof(ProbabilityInt) + sizeof(ProbabilityInt) + sizeof(ReferenceIndex))))); // Write the grapheme Trie Dictionary bw.Write(graphemeDictData, 0, graphemeDictData.Length); // Add the data alignment for grapheme Trie Dictionary for (int i = graphemeDictData.Length; i < graphemeDictLength; i++) { bw.Write((byte)0); } // Write the grammar states for low level gram for (int i = 0; i < finalGramStateIndex; i++) { bw.Write(grammarStates[i].GraphId); bw.Write(grammarStates[i].Prob); bw.Write(grammarStates[i].Backoff); bw.Write(grammarStates[i].ReferenceIndex); } // Write the grammar state for final level gram for (int i = finalGramStateIndex; i < _grammarCount + 2; i++) { bw.Write(grammarStates[i].GraphId); bw.Write(grammarStates[i].Prob); } } } }
/// <summary> /// 将candidate按字典序排好 /// </summary> /// <param name="candidate"></param> /// <returns>按字典序排好的新的candidate</returns> static Candidates<int> SortInTrie(Candidates<int> candidate) { //建树 TrieTree<int> trieTree = new TrieTree<int>(); foreach (var itemset in candidate.ItemSets) { trieTree.AddNode(itemset.data, itemset.value); } //取出 Candidates<int> result = new Candidates<int>(); trieTree.ClearTag(trieTree.Root); foreach (var node in trieTree.Root.children) { string str = trieTree.GetNodeData(node); while (str != "") { string[] strData = str.Split(new char[] { ' ' }); int[] data = new int[strData.Length - 1]; for (int i = 0; i < strData.Length - 1; i++) data[i] = Convert.ToInt32(strData[i]); result.AddCandidate(data, Convert.ToInt32(strData[strData.Length - 1])); str = trieTree.GetNodeData(node); } } return result; }
T Insert(TrieTree tr, T pred) { if (tr.IsLeaf) { var leaf = tr.leaf; if (tr.k < atoms.Count) { #region extend the trie using atoms[tr.k] var vk = atoms[tr.k]; tr.leaf = default(T); if (algebra.EvaluateAtom(vk, leaf)) { tr.t1 = new TrieTree(tr.k + 1, leaf, null, null); if (algebra.EvaluateAtom(vk, pred)) { return(Insert(tr.t1, pred)); } else { //k is smallest such that vk distinguishes leaf and pred tr.t0 = new TrieTree(tr.k + 1, pred, null, null); return(pred); //pred is new } } else { tr.t0 = new TrieTree(tr.k + 1, leaf, null, null); if (algebra.EvaluateAtom(vk, pred)) { //k is smallest such that vk distinguishes leaf and pred tr.t1 = new TrieTree(tr.k + 1, pred, null, null); return(pred); //pred is new } else { return(Insert(tr.t0, pred)); } } #endregion } else { #region the existing atoms did not distinguish pred from leaf var symdiff = algebra.MkSymmetricDifference(leaf, pred); var atom = algebra.GetAtom(symdiff); if (atom.Equals(algebra.False)) { return(leaf); //pred is equivalent to leaf } else { //split the leaf based on the new atom atoms.Add(atom); if (algebra.EvaluateAtom(atom, leaf)) { tr.t0 = new TrieTree(tr.k + 1, pred, null, null); tr.t1 = new TrieTree(tr.k + 1, leaf, null, null); } else { tr.t0 = new TrieTree(tr.k + 1, leaf, null, null); tr.t1 = new TrieTree(tr.k + 1, pred, null, null); } tr.leaf = default(T); return(pred); //pred is new } #endregion } } else { #region in a nonleaf the invariant holds: tr.k < atoms.Count if (algebra.EvaluateAtom(atoms[tr.k], pred)) { if (tr.t1 == null) { tr.t1 = new TrieTree(tr.k + 1, pred, null, null); return(pred); } else { return(Insert(tr.t1, pred)); } } else { if (tr.t0 == null) { tr.t0 = new TrieTree(tr.k + 1, pred, null, null); return(pred); } else { return(Insert(tr.t0, pred)); } } #endregion } }
/// <summary> /// 生成一个词典 /// </summary> /// <param name="path"></param> /// <param name="contents">词典所要包含的信息</param> public void Build(string path, Contents contents) { FileStream stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.Write); BinaryWriter writer = new BinaryWriter(stream, Encoding.UTF8); // 数据库信息,占用 8 bytes + 8 bytes = 16 bytes writer.Write(LEX_SIGNATURE); // 词典标识符,4 bytes writer.Write(LEX_THIS_VERSION); // 词典版本,4 bytes writer.Write(new byte[8]); // 保留位置,8 bytes // 偏移位置信息,占用 5 * 4 + 1 = 21 bytes writer.Write(OFS_WORDS_BACKWARD); // 1 byte writer.Write(0); // 逆向词汇起始位置,4 bytes writer.Write(OFS_WORDS_FORWARD); // 1 byte writer.Write(0); // 正向词汇起始位置,4 bytes writer.Write(OFS_FREQS_WORDS); // 1 byte writer.Write(0); // 词汇频率起始位置,4 bytes writer.Write(OFS_FREQS_CHARS); // 1 byte writer.Write(0); // 单字频率起始位置,4 bytes writer.Write(OFS_END); int ofs_cur = (int)writer.BaseStream.Position; List <string> words = m_word_list.Words; // 逆向词汇 if ((contents & Contents.WordsBackward) != 0) { TrieTree tree_b = new TrieTree(); for (int i = 0; i < words.Count; i++) { tree_b.AddString(this.ReverseString(words[i])); } ArrayList al_b = tree_b.GetDoubleArrays(); string[] chars_b = (string[])al_b[0]; int[][] idxes_b = (int[][])al_b[1]; int[] ofses_b = new int[idxes_b.Length]; writer.BaseStream.Seek(17, SeekOrigin.Begin); writer.Write(ofs_cur); writer.BaseStream.Seek(0, SeekOrigin.End); for (int i = 0; i < idxes_b.Length; i++) { ofses_b[i] = ofs_cur; ofs_cur += Encoding.UTF8.GetByteCount(chars_b[i]) + idxes_b[i].Length * 4; } for (int i = 0; i < chars_b.Length; i++) { byte[] buf = Encoding.UTF8.GetBytes(chars_b[i]); writer.Write(buf.Length - 1); writer.Write(buf); for (int j = 1; j < idxes_b[i].Length; j++) { writer.Write(ofses_b[idxes_b[i][j]]); } } } // 正向词汇 if ((contents & Contents.WordsForward) != 0) { TrieTree tree_f = new TrieTree(); for (int i = 0; i < words.Count; i++) { tree_f.AddString(words[i]); } ArrayList al_f = tree_f.GetDoubleArrays(); string[] chars_f = (string[])al_f[0]; int[][] idxes_f = (int[][])al_f[1]; int[] ofses_f = new int[idxes_f.Length]; writer.BaseStream.Seek(22, SeekOrigin.Begin); writer.Write(ofs_cur); writer.BaseStream.Seek(0, SeekOrigin.End); for (int i = 0; i < idxes_f.Length; i++) { ofses_f[i] = ofs_cur; ofs_cur += Encoding.UTF8.GetByteCount(chars_f[i]) + idxes_f[i].Length * 4; } for (int i = 0; i < chars_f.Length; i++) { byte[] buf = Encoding.UTF8.GetBytes(chars_f[i]); writer.Write(buf.Length - 1); writer.Write(buf); for (int j = 1; j < idxes_f[i].Length; j++) { writer.Write(ofses_f[idxes_f[i][j]]); } } } // 词汇频率 if ((contents & Contents.FreqsWords) != 0) { // lex["freqs_words"] = m_freq_list.Words; } // 单字频率 if ((contents & Contents.FreqsChars) != 0) { writer.BaseStream.Seek(32, SeekOrigin.Begin); writer.Write(ofs_cur); writer.BaseStream.Seek(0, SeekOrigin.End); Dictionary <string, float> chars = m_freq_list.Chars; foreach (string key in chars.Keys) { byte[] buf = Encoding.UTF8.GetBytes(key); writer.Write(buf.Length); writer.Write(buf); writer.Write(chars[key]); } writer.Write(0); ofs_cur = (int)writer.BaseStream.Position; } writer.Flush(); writer.Close(); }
public TrieTreeTests() { tree = new TrieTree(); }
/// <summary> /// 按字典序输出结果 /// </summary> static void OutputInTrieTree() { Candidates<int> result = new Candidates<int>(); TrieTree<int> trieTree = new TrieTree<int>(); //将所有项集按字典序排序 foreach(var candi in frequentItemsets) { foreach(var itemset in candi.ItemSets) { result.AddCandidate(itemset); } } result = SortInTrie(result); //输出 StreamWriter sw = new StreamWriter(addrRoot + @"result.txt"); foreach(var itemset in result.ItemSets) { foreach (var item in itemset.data) sw.Write(item.ToString() + " "); sw.WriteLine(((double)itemset.value / (double)transactions.Count).ToString("0.000")); } sw.Close(); }