public void TestCreateBasicTrieTreeWithPOSType()
        {
            TrieTree tt = TrieTree.GetInstance();

            tt.AddWord("测试", POSType.D_N);
            tt.AddWord("你好", POSType.UNKNOWN);
            tt.AddWord("我们", POSType.D_R);
            tt.AddWord("你们", POSType.D_R);

            Assert.AreEqual(3, tt.Root.Children.Count);
            TrieTreeNode tn1 = tt.GetNode("你们", (int)POSType.D_N);

            Assert.IsNull(tn1);
            TrieTreeNode tn2 = tt.GetNode("你们", (int)POSType.D_R);

            Assert.IsNotNull(tn2);
            Assert.AreEqual("你们", tn2.Word);
            Assert.IsTrue(tn2.Children.Count == 0);

            Assert.IsNull(tt.GetNode("它们"));

            TrieTreeNode tn = tt.GetNode('你');

            Assert.AreEqual("你", tn.Word);
            Assert.AreEqual(2, tn.Children.Count);

            tn = tt.GetNode('你', (int)POSType.UNKNOWN);
            Assert.AreEqual("你", tn.Word);

            Assert.AreEqual(1, tt.GetNode("我").Children.Count);
        }
Example #2
0
 internal TrieTree(int k, T leaf, TrieTree t0, TrieTree t1)
 {
     this.k    = k;
     this.leaf = leaf;
     this.t0   = t0;
     this.t1   = t1;
 }
    public IList <string> FindWords(char[][] board, string[] words)
    {
        // dfs + backtracking
        // tc:O(m*n*4^(k-1)); sc:O(m*n)
        if (board == null && board.Length == 0)  //corner case
        {
            return(new List <string>());
        }
        bool[,] visited = new bool[board.Length, board[0].Length];

        TrieTree trie = new TrieTree(new TrieNode()); // build the trie tree

        foreach (string wd in words)                  // insert words into trie tree
        {
            trie.InsertNode(wd);
        }

        IList <string> res = new List <string>();

        for (int i = 0; i < board.Length; i++)
        {
            for (int j = 0; j < board[i].Length; j++)
            {
                SearchWord(board, visited, res, trie.root, i, j);
            }
        }
        return(res);
    }
Example #4
0
 /// <summary>
 /// For all p: p is equivalent to Search(p).
 /// For all p and q: if p is equivalent to q then Search(p) equals Search(q).
 /// </summary>
 /// <param name="p">given predicate</param>
 public T Search(T p)
 {
     if (tree == null)
     {
         tree       = new TrieTree(0, p, null, null);
         idCache[p] = p;
         count      = 1;
         return(p);
     }
     else
     {
         T id;
         if (!idCache.TryGetValue(p, out id))
         {
             id = Insert(tree, p);
             if (!idCache.ContainsKey(id))
             {
                 //then p == id and p is in a new leaf
                 count += 1;
             }
             idCache[p] = id;
         }
         return(id);
     }
 }
Example #5
0
        public void TwoAnd()
        {
            TrieTree tree = new TrieTree();

            Assert.AreEqual(1, tree.AddNodes("and").Frequence);
            Assert.AreEqual(2, tree.AddNodes("and").Frequence);
            Assert.AreEqual(1, tree.AddNodes("ande").Frequence);
        }
Example #6
0
            internal static TrieTree MkInitialTree(IBooleanAlgebra <T> algebra)
            {
                var t0   = new TrieTree(1, algebra.False, null, null);
                var t1   = new TrieTree(1, algebra.True, null, null);
                var tree = new TrieTree(0, default(T), t0, t1);    // any element implies True and does not imply False

                return(tree);
            }
Example #7
0
        public void When_Root_Has_MoreThanOneNode()
        {
            TrieTree tree = new TrieTree();

            Assert.AreEqual(1, tree.AddNodes("and").Frequence);
            Assert.AreEqual(1, tree.AddNodes("bne").Frequence);
            Assert.AreEqual(1, tree.AddNodes("bnd").Frequence);
            Assert.AreEqual(1, tree.AddNodes("cnee").Frequence);
        }
Example #8
0
        public void Build(string path, Contents contents)
        {
            Hashtable lex = new Hashtable();

            Hashtable metainfo = new Hashtable();

            metainfo.Add("version", m_meta_info.Version);
            metainfo.Add("title", m_meta_info.Title);
            metainfo.Add("author", m_meta_info.Author);
            metainfo.Add("creator", m_meta_info.Creator);
            metainfo.Add("contents", m_meta_info.Contents);
            metainfo.Add("count", m_meta_info.Count);
            metainfo.Add("size", m_meta_info.Size);
            metainfo.Add("terminal", m_meta_info.Terminal);

            lex["metainfo"] = metainfo;

            List <string> words = m_word_list.Words;

            if ((contents & Contents.WordsBackward) != 0)
            {
                TrieTree tree_b = new TrieTree();
                for (int i = 0; i < words.Count; i++)
                {
                    tree_b.AddString(this.ReverseString(words[i]));
                }
                ArrayList al_b = tree_b.GetDoubleArrays();
                lex["chars_b"] = al_b[0];
                lex["idxes_b"] = al_b[1];
            }
            if ((contents & Contents.WordsForward) != 0)
            {
                TrieTree tree_f = new TrieTree();
                for (int i = 0; i < words.Count; i++)
                {
                    tree_f.AddString(words[i]);
                }
                ArrayList al_f = tree_f.GetDoubleArrays();
                lex["chars_f"] = al_f[0];
                lex["idxes_f"] = al_f[1];
            }
            if ((contents & Contents.FreqsWords) != 0)
            {
                lex["freqs_words"] = m_freq_list.Words;
            }
            if ((contents & Contents.FreqsChars) != 0)
            {
                lex["freqs_chars"] = m_freq_list.Chars;
            }
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter formatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write);

            formatter.Serialize(fs, lex);
            fs.Close();
        }
Example #9
0
        public override void ExecuteCommand(TrieTreeSession session, StringCommandInfo commandInfo)
        {
            logger.Info("receive command RMMFetch, argument: " + commandInfo.Data);
            Stopwatch sw = new Stopwatch();

            sw.Start();
            var tt = TrieTree.GetInstance();

            string[] splits    = commandInfo.Data.Split(new char[] { SOH });
            int      maxlength = 0;
            string   text      = splits[0];
            int      pos       = 0;

            if (splits.Length > 1)
            {
                pos = Int32.Parse(splits[1]);
            }
            maxlength = text.Length;
            string temp = null;

            TrieTreeNode node = null;

            for (int j = 0; j < maxlength; j++)
            {
                temp = text.Substring(j, maxlength - j);
                node = tt.GetNode(temp, pos);
                if (node != null)
                {
                    break;
                }
            }
            sw.Stop();
            logger.InfoFormat("timer: {0} ms", sw.ElapsedMilliseconds);

            string result = null;

            if (node == null)
            {
                result = string.Format("0{0}", SOH);
            }
            else
            {
                result = string.Format("1{0}{1}{0}{2}{0}{3}{0}", SOH, node.Word, node.Frequency, node.POSValue);
            }

            if (node != null)
            {
                logger.InfoFormat("return result: {0}, {1}", node.Word, (POSType)node.POSValue);
            }
            else
            {
                logger.InfoFormat("return result: not found");
            }
            session.SendResponse(result);
        }
        public void NoStatsUntilOptimize()
        {
            var tree = new TrieTree();

            Assert.IsNull(tree.Statistics);

            tree.AddWord("aa");
            Assert.IsNull(tree.Statistics);

            tree.Optimize();
            Assert.IsNotNull(tree.Statistics);
        }
Example #11
0
        public void TestLoadTrieTreeFromPanguDict()
        {
            var mockPanguProviderSetting = new Mock <IDataProviderSetting>();

            mockPanguProviderSetting.Setup(s => s.Uri).Returns(DictLoadTest.UnitTestProjectFolder + @"\Data\panguDict.dct");
            mockPanguProviderSetting.Setup(s => s.ProviderType).Returns("PanguDictProvider");

            var          panguProvider = new PanguDictProvider(mockPanguProviderSetting.Object);
            TrieTree     tt            = TrieFactory.LoadFromDataProvider(panguProvider);
            TrieTreeNode ttn           = tt.GetNode("测试");

            Assert.IsNotNull(ttn);
            Assert.AreEqual(POSType.D_N, (POSType)ttn.POSValue);
        }
        public void StatsWhenEmptyTree()
        {
            var tree = new TrieTree();

            tree.Optimize();
            var stats = tree.Statistics;

            Assert.AreEqual(0, stats.AverageChildCount, "AverageChildCount");
            Assert.AreEqual(0, stats.AverageWordLength, "AverageWordLength");
            Assert.AreEqual(0, stats.CummulativeWordLength, "CummulativeWordLength");
            Assert.AreEqual(0, stats.MaxWordLength, "MaxWordLength");
            Assert.AreEqual(1, stats.NodeCount, "NodeCount");
            Assert.AreEqual(0, stats.WordCount, "WordCount");
        }
Example #13
0
        public void TestLoadTrieTreeFromMongoDB()
        {
            var mockProviderSetting = new Mock <IDataProviderSetting>();

            mockProviderSetting.Setup(s => s.Uri).Returns("mongodb://localhost:28001");
            mockProviderSetting.Setup(s => s.ProviderType).Returns("MongoDBDataProvider");
            mockProviderSetting.Setup(s => s.DBName).Returns("nameResearch");
            mockProviderSetting.Setup(s => s.CollectionName).Returns("placeNames");
            var      dataProvider = new MongoDBDataProvider(mockProviderSetting.Object);
            TrieTree tt           = TrieFactory.LoadFromDataProvider(dataProvider);

            Assert.IsNotNull(tt);
            Assert.IsNotNull(tt.Root);
            Assert.IsTrue(tt.Root.Children.Count > 100);
        }
Example #14
0
        public Setting(string file)
        {
            using (StreamReader sr = System.IO.File.OpenText(file))
            {
                target = sr.ReadLine();
                string s;
                pattern = new TrieTree <byte>();
                Regex rex = new Regex(@"[0-9a-zA-Z][0-9a-zA-Z]");
                while ((s = sr.ReadLine()) != null)
                {
                    if (s == "" || s[0] == '#')
                    {
                        continue;
                    }
                    byte  b;
                    Match m = rex.Match(s);

                    pattern.setRoot();
                    while (m.Success)
                    {
                        b   = 0;
                        b   = oct(m.Value[0]);
                        b <<= 4;
                        b  += oct(m.Value[1]);
                        pattern.add(ref b);

                        m = m.NextMatch();
                    }
                    s = sr.ReadLine();
                    m = rex.Match(s);
                    while (m.Success)
                    {
                        b   = 0;
                        b   = oct(m.Value[0]);
                        b <<= 4;
                        b  += oct(m.Value[1]);
                        pattern.addSubstitue(b);

                        m = m.NextMatch();
                    }
                }
            }
        }
Example #15
0
        public void Performance()
        {
            TrieTree tree  = new TrieTree();
            string   words = GetEmbededResource(this.GetType().Assembly, "words.txt");

            foreach (var item in words.Split('\n'))
            {
                if (string.IsNullOrEmpty(item))
                {
                    continue;
                }
                if (item.Trim().All(c => char.IsLetter(c)))
                {
                    Assert.AreEqual(1, tree.AddNodes(item.Trim()).Frequence);
                }
            }

            Assert.AreEqual(1, tree.Search("good").Frequence);
        }
Example #16
0
        void UpdateTrie(bool refresh_query)
        {
            trie = new TrieTree(false /* !case_sensitive */);
            List <PersonLink> people = new List <PersonLink> ();

            Logger.Log("Loading up the person trie, Part 1...");

            foreach (Person person in Galago.Global.GetPeople(Galago.Origin.Remote,
                                                              refresh_query))
            {
                string name = person.DisplayName;

                if (name != null)
                {
                    people.Add(new PersonLink(LinkType.PersonDisplayName, person));
                }

                foreach (Account account in person.GetAccounts(true))
                {
                    if (account.DisplayName != null)
                    {
                        people.Add(new PersonLink(LinkType.AccountDisplayName,
                                                  account));
                    }

                    if (account.Username != null &&
                        account.Username != account.DisplayName)
                    {
                        people.Add(new PersonLink(LinkType.AccountUserName,
                                                  account));
                    }
                }
            }

            Logger.Log("Loading up the person trie, Part 2...");

            foreach (PersonLink plink in people)
            {
                trie.AddKeyword(plink.LinkText, plink);
            }

            Logger.Log("Done.");
        }
        public void StatsWhenSomeWords()
        {
            var tree = new TrieTree();

            tree.AddWord("aaa");
            tree.AddWord("abba");
            tree.AddWord("aba");
            tree.AddWord("abb");
            tree.AddWord("abc");
            tree.Optimize();
            var stats = tree.Statistics;

            Assert.AreEqual(8 / 5, stats.AverageChildCount, "AverageChildCount");
            Assert.AreEqual(16 / 5, stats.AverageWordLength, "AverageWordLength");
            Assert.AreEqual(16, stats.CummulativeWordLength, "CummulativeWordLength");
            Assert.AreEqual(4, stats.MaxWordLength, "MaxWordLength");
            Assert.AreEqual(9, stats.NodeCount, "NodeCount");
            Assert.AreEqual(5, stats.WordCount, "WordCount");
        }
Example #18
0
        public string LongestWord(string[] words)
        {
            /*
             * 实现思路:
             * 1.使用数据源来“喂养”TrieTree
             * 2.再让TrieTree告知我们,满足题目需求的结果
             *
             * 时间复杂度:输入的单词,是要遍历一次的,构建trieTree,然后要拿到结果,还是要做回溯的,那么是对树所有节点又遍历一次
             * 空间复杂度:主要是TrieTree占用的存储空间
             *
             * 使用此种方式,其实就是数据源越大越是有利
             */

            var trieTree = new TrieTree();

            foreach (var wordItem in words)
            {
                trieTree.AddWord(wordItem);
            }

            return(trieTree.GetLongestWord());
        }
Example #19
0
        public void TestCreateBasicTrieTree()
        {
            TrieTree tt = TrieTree.GetInstance();

            tt.AddWord("测试");
            tt.AddWord("你好");
            tt.AddWord("我们");
            tt.AddWord("你们");

            Assert.AreEqual(3, tt.Root.Children.Count);
            TrieTreeNode tn1 = tt.GetNode("你们");

            Assert.IsNotNull(tn1);
            Assert.AreEqual("你们", tn1.Word);
            Assert.IsTrue(tn1.Children.Count == 0);
            Assert.IsNull(tt.GetNode("它们"));

            TrieTreeNode tn = tt.GetNode('你');

            Assert.AreEqual("你", tn.Word);
            Assert.AreEqual(2, tn.Children.Count);

            Assert.AreEqual(1, tt.GetNode("我").Children.Count);
        }
Example #20
0
        /// <summary>
        /// Save the gram data into stream.
        /// </summary>
        /// <param name="stream">Binary stream.</param>
        public void SaveToBinary(Stream stream)
        {
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }

            if (_grammarCount == 0)
            {
                throw new InvalidDataException("There is no nGram data");
            }

            using (TrieTree graphemeTrieTree = new TrieTree(_graphemeDictionary))
            {
                byte[] graphemeDictData = graphemeTrieTree.GetTrieData();
                int graphemeDictLength = graphemeDictData.Length;
                
                // Keep data alignment as 2 bytes
                if (graphemeDictLength % 2 != 0)
                {
                    graphemeDictLength++;
                }

                // Save NGramData
                GrammarState[] grammarStates = new GrammarState[_grammarCount + 2];
                grammarStates[0] = new GrammarState();
                grammarStates[0].ReferenceIndex = 1;

                Dictionary<string, int> grammarIndex = new Dictionary<string, int>();
                int stateIndex = 1;
                int finalGramStateIndex = 0;
                for (int gram = 1; gram <= _maxNgram; gram++)
                {
                    string lastReferredGrammar = string.Empty;
                    if (gram == _maxNgram)
                    {
                        finalGramStateIndex = stateIndex;
                        grammarStates[stateIndex++] = new GrammarState();
                    }

                    foreach (string grammar in _nGramData[gram].Keys)
                    {
                        string[] graphemes = grammar.Split(GrammarSeparator, StringSplitOptions.RemoveEmptyEntries);

                        // last grapheme
                        string lastGrapheme = graphemes[graphemes.Length - 1];
                        int len = 0;
                        int graphemeId = graphemeTrieTree.FindLongest(lastGrapheme, out len);
                        Debug.Assert(graphemeId != -1);
                        Debug.Assert(!grammarIndex.ContainsKey(grammar));

                        // Save the state index for easily query
                        grammarIndex.Add(grammar, stateIndex);
                        grammarStates[stateIndex] = new GrammarState();
                        grammarStates[stateIndex].GraphId = (GrapId)graphemeId;

                        // Convert the probability into ProbabilityInt type with amplifier
                        if (_nGramData[gram][grammar].Probability * _probabilityAmplifier < ProbabilityInt.MinValue)
                        {
                            grammarStates[stateIndex].Prob = ProbabilityInt.MinValue;
                        }
                        else
                        {
                            grammarStates[stateIndex].Prob = (ProbabilityInt)(_nGramData[gram][grammar].Probability * _probabilityAmplifier);
                        }

                        if (_nGramData[gram][grammar].Backoff * _probabilityAmplifier < short.MinValue)
                        {
                            grammarStates[stateIndex].Backoff = ProbabilityInt.MinValue;
                        }
                        else
                        {
                            grammarStates[stateIndex].Backoff = (ProbabilityInt)(_nGramData[gram][grammar].Backoff * _probabilityAmplifier);
                        }

                        // set the reference index for lower level gram data
                        if (gram != 1)
                        {
                            string referredGrammar = graphemes[0];
                            for (int i = 1; i < graphemes.Length - 1; i++)
                            {
                                referredGrammar = referredGrammar + " " + graphemes[i];
                            }

                            if (!referredGrammar.Equals(lastReferredGrammar, StringComparison.Ordinal))
                            {
                                // Update the reference index for the lower level gram
                                lastReferredGrammar = referredGrammar;
                                Debug.Assert(grammarIndex.ContainsKey(lastReferredGrammar));
                                int referredIndex = grammarIndex[lastReferredGrammar];
                                Debug.Assert(grammarStates[referredIndex] != null);
                                if (gram != _maxNgram)
                                {
                                    grammarStates[referredIndex].ReferenceIndex = (ReferenceIndex)stateIndex;
                                }
                                else
                                {
                                    grammarStates[referredIndex].ReferenceIndex = (ReferenceIndex)(stateIndex - finalGramStateIndex);
                                }
                            }
                        }

                        stateIndex++;
                    }
                }

                // Save the model into binary stream
                BinaryWriter bw = new BinaryWriter(stream);
                {
                    // Write the language ID
                    bw.Write((ushort)_language);

                    // Write the Gram Count
                    bw.Write((ushort)this._maxNgram);

                    // Write the Probability Amplifier
                    bw.Write((int)_probabilityAmplifier);

                    // Write the grammar state number
                    bw.Write((uint)finalGramStateIndex);

                    // Write the Final grammar state number
                    bw.Write((uint)(_grammarCount + 2 - finalGramStateIndex));

                    int headerSize = sizeof(ushort) + sizeof(ushort) + sizeof(int) +
                        sizeof(uint) + sizeof(uint) +
                        sizeof(uint) + sizeof(uint) + sizeof(uint);

                    // Write the offset of Dictionary
                    bw.Write((uint)headerSize);

                    // Write the offset of Grammar State
                    bw.Write((uint)(headerSize + graphemeDictLength));

                    // Write the offset of Final Grammar State
                    bw.Write((uint)(headerSize + graphemeDictLength + (finalGramStateIndex *
                        (sizeof(GrapId) + sizeof(ProbabilityInt) + sizeof(ProbabilityInt) + sizeof(ReferenceIndex)))));

                    // Write the grapheme Trie Dictionary
                    bw.Write(graphemeDictData, 0, graphemeDictData.Length);

                    // Add the data alignment for grapheme Trie Dictionary
                    for (int i = graphemeDictData.Length; i < graphemeDictLength; i++)
                    {
                        bw.Write((byte)0);
                    }

                    // Write the grammar states for low level gram
                    for (int i = 0; i < finalGramStateIndex; i++)
                    {
                        bw.Write(grammarStates[i].GraphId);
                        bw.Write(grammarStates[i].Prob);
                        bw.Write(grammarStates[i].Backoff);
                        bw.Write(grammarStates[i].ReferenceIndex);
                    }

                    // Write the grammar state for final level gram
                    for (int i = finalGramStateIndex; i < _grammarCount + 2; i++)
                    {
                        bw.Write(grammarStates[i].GraphId);
                        bw.Write(grammarStates[i].Prob);
                    }
                }
            }           
        }
 /// <summary>
 /// 将candidate按字典序排好
 /// </summary>
 /// <param name="candidate"></param>
 /// <returns>按字典序排好的新的candidate</returns>
 static Candidates<int> SortInTrie(Candidates<int> candidate)
 {
     //建树
     TrieTree<int> trieTree = new TrieTree<int>();
     foreach (var itemset in candidate.ItemSets)
     {
         trieTree.AddNode(itemset.data, itemset.value);
     }
     //取出
     Candidates<int> result = new Candidates<int>();
     trieTree.ClearTag(trieTree.Root);
     foreach (var node in trieTree.Root.children)
     {
         string str = trieTree.GetNodeData(node);
         while (str != "")
         {
             string[] strData = str.Split(new char[] { ' ' });
             int[] data = new int[strData.Length - 1];
             for (int i = 0; i < strData.Length - 1; i++) data[i] = Convert.ToInt32(strData[i]);
             result.AddCandidate(data, Convert.ToInt32(strData[strData.Length - 1]));
             str = trieTree.GetNodeData(node);
         }
     }
     return result;
 }
Example #22
0
 T Insert(TrieTree tr, T pred)
 {
     if (tr.IsLeaf)
     {
         var leaf = tr.leaf;
         if (tr.k < atoms.Count)
         {
             #region extend the trie using atoms[tr.k]
             var vk = atoms[tr.k];
             tr.leaf = default(T);
             if (algebra.EvaluateAtom(vk, leaf))
             {
                 tr.t1 = new TrieTree(tr.k + 1, leaf, null, null);
                 if (algebra.EvaluateAtom(vk, pred))
                 {
                     return(Insert(tr.t1, pred));
                 }
                 else
                 {
                     //k is smallest such that vk distinguishes leaf and pred
                     tr.t0 = new TrieTree(tr.k + 1, pred, null, null);
                     return(pred); //pred is new
                 }
             }
             else
             {
                 tr.t0 = new TrieTree(tr.k + 1, leaf, null, null);
                 if (algebra.EvaluateAtom(vk, pred))
                 {
                     //k is smallest such that vk distinguishes leaf and pred
                     tr.t1 = new TrieTree(tr.k + 1, pred, null, null);
                     return(pred); //pred is new
                 }
                 else
                 {
                     return(Insert(tr.t0, pred));
                 }
             }
             #endregion
         }
         else
         {
             #region the existing atoms did not distinguish pred from leaf
             var symdiff = algebra.MkSymmetricDifference(leaf, pred);
             var atom    = algebra.GetAtom(symdiff);
             if (atom.Equals(algebra.False))
             {
                 return(leaf);  //pred is equivalent to leaf
             }
             else
             {
                 //split the leaf based on the new atom
                 atoms.Add(atom);
                 if (algebra.EvaluateAtom(atom, leaf))
                 {
                     tr.t0 = new TrieTree(tr.k + 1, pred, null, null);
                     tr.t1 = new TrieTree(tr.k + 1, leaf, null, null);
                 }
                 else
                 {
                     tr.t0 = new TrieTree(tr.k + 1, leaf, null, null);
                     tr.t1 = new TrieTree(tr.k + 1, pred, null, null);
                 }
                 tr.leaf = default(T);
                 return(pred); //pred is new
             }
             #endregion
         }
     }
     else
     {
         #region in a nonleaf the invariant holds: tr.k < atoms.Count
         if (algebra.EvaluateAtom(atoms[tr.k], pred))
         {
             if (tr.t1 == null)
             {
                 tr.t1 = new TrieTree(tr.k + 1, pred, null, null);
                 return(pred);
             }
             else
             {
                 return(Insert(tr.t1, pred));
             }
         }
         else
         {
             if (tr.t0 == null)
             {
                 tr.t0 = new TrieTree(tr.k + 1, pred, null, null);
                 return(pred);
             }
             else
             {
                 return(Insert(tr.t0, pred));
             }
         }
         #endregion
     }
 }
Example #23
0
        /// <summary>
        /// 生成一个词典
        /// </summary>
        /// <param name="path"></param>
        /// <param name="contents">词典所要包含的信息</param>
        public void Build(string path, Contents contents)
        {
            FileStream stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.Write);

            BinaryWriter writer = new BinaryWriter(stream, Encoding.UTF8);

            // 数据库信息,占用 8 bytes + 8 bytes = 16 bytes
            writer.Write(LEX_SIGNATURE);    // 词典标识符,4 bytes
            writer.Write(LEX_THIS_VERSION); // 词典版本,4 bytes
            writer.Write(new byte[8]);      // 保留位置,8 bytes

            // 偏移位置信息,占用 5 * 4 + 1 = 21 bytes
            writer.Write(OFS_WORDS_BACKWARD); // 1 byte
            writer.Write(0);                  // 逆向词汇起始位置,4 bytes
            writer.Write(OFS_WORDS_FORWARD);  // 1 byte
            writer.Write(0);                  // 正向词汇起始位置,4 bytes
            writer.Write(OFS_FREQS_WORDS);    // 1 byte
            writer.Write(0);                  // 词汇频率起始位置,4 bytes
            writer.Write(OFS_FREQS_CHARS);    // 1 byte
            writer.Write(0);                  // 单字频率起始位置,4 bytes
            writer.Write(OFS_END);

            int ofs_cur = (int)writer.BaseStream.Position;

            List <string> words = m_word_list.Words;

            // 逆向词汇
            if ((contents & Contents.WordsBackward) != 0)
            {
                TrieTree tree_b = new TrieTree();
                for (int i = 0; i < words.Count; i++)
                {
                    tree_b.AddString(this.ReverseString(words[i]));
                }

                ArrayList al_b    = tree_b.GetDoubleArrays();
                string[]  chars_b = (string[])al_b[0];
                int[][]   idxes_b = (int[][])al_b[1];
                int[]     ofses_b = new int[idxes_b.Length];

                writer.BaseStream.Seek(17, SeekOrigin.Begin);
                writer.Write(ofs_cur);
                writer.BaseStream.Seek(0, SeekOrigin.End);

                for (int i = 0; i < idxes_b.Length; i++)
                {
                    ofses_b[i] = ofs_cur;
                    ofs_cur   += Encoding.UTF8.GetByteCount(chars_b[i]) + idxes_b[i].Length * 4;
                }

                for (int i = 0; i < chars_b.Length; i++)
                {
                    byte[] buf = Encoding.UTF8.GetBytes(chars_b[i]);
                    writer.Write(buf.Length - 1);
                    writer.Write(buf);
                    for (int j = 1; j < idxes_b[i].Length; j++)
                    {
                        writer.Write(ofses_b[idxes_b[i][j]]);
                    }
                }
            }

            // 正向词汇
            if ((contents & Contents.WordsForward) != 0)
            {
                TrieTree tree_f = new TrieTree();
                for (int i = 0; i < words.Count; i++)
                {
                    tree_f.AddString(words[i]);
                }

                ArrayList al_f    = tree_f.GetDoubleArrays();
                string[]  chars_f = (string[])al_f[0];
                int[][]   idxes_f = (int[][])al_f[1];
                int[]     ofses_f = new int[idxes_f.Length];

                writer.BaseStream.Seek(22, SeekOrigin.Begin);
                writer.Write(ofs_cur);
                writer.BaseStream.Seek(0, SeekOrigin.End);

                for (int i = 0; i < idxes_f.Length; i++)
                {
                    ofses_f[i] = ofs_cur;
                    ofs_cur   += Encoding.UTF8.GetByteCount(chars_f[i]) + idxes_f[i].Length * 4;
                }

                for (int i = 0; i < chars_f.Length; i++)
                {
                    byte[] buf = Encoding.UTF8.GetBytes(chars_f[i]);
                    writer.Write(buf.Length - 1);
                    writer.Write(buf);
                    for (int j = 1; j < idxes_f[i].Length; j++)
                    {
                        writer.Write(ofses_f[idxes_f[i][j]]);
                    }
                }
            }

            // 词汇频率
            if ((contents & Contents.FreqsWords) != 0)
            {
//                lex["freqs_words"] = m_freq_list.Words;
            }

            // 单字频率
            if ((contents & Contents.FreqsChars) != 0)
            {
                writer.BaseStream.Seek(32, SeekOrigin.Begin);
                writer.Write(ofs_cur);
                writer.BaseStream.Seek(0, SeekOrigin.End);

                Dictionary <string, float> chars = m_freq_list.Chars;

                foreach (string key in chars.Keys)
                {
                    byte[] buf = Encoding.UTF8.GetBytes(key);
                    writer.Write(buf.Length);
                    writer.Write(buf);
                    writer.Write(chars[key]);
                }

                writer.Write(0);

                ofs_cur = (int)writer.BaseStream.Position;
            }

            writer.Flush();

            writer.Close();
        }
 public TrieTreeTests()
 {
     tree = new TrieTree();
 }
 /// <summary>
 /// 按字典序输出结果
 /// </summary>
 static void OutputInTrieTree()
 {
     Candidates<int> result = new Candidates<int>();
     TrieTree<int> trieTree = new TrieTree<int>();
     //将所有项集按字典序排序
     foreach(var candi in frequentItemsets)
     {
         foreach(var itemset in candi.ItemSets)
         {
             result.AddCandidate(itemset);
         }
     }
     result = SortInTrie(result);
     //输出
     StreamWriter sw = new StreamWriter(addrRoot + @"result.txt");
     foreach(var itemset in result.ItemSets)
     {
         foreach (var item in itemset.data) sw.Write(item.ToString() + " ");
         sw.WriteLine(((double)itemset.value / (double)transactions.Count).ToString("0.000"));
     }
     sw.Close();
 }