예제 #1
0
 public BinaryTree(Node i_node, ref BinaryTree i_left, ref BinaryTree i_right)
 {
     IsInTheCurrentLevel = true;
     Node = i_node;
     Left = i_left;
     Right = i_right;
 }
예제 #2
0
        // create the Huffman tree based on the counted frequency
        private void CreateHuffmanTree()
        {
            uint positiveCount = GetNumberOfPositiveValuesInFrequencyTable();
            m_HuffmanTree = new BinaryTree[2 * positiveCount - 1];
            for (uint i = 0; i < positiveCount; i++)
            {
                byte existingByte = GetExistingByteAtIndex(i);
                m_HuffmanTree[i] = new BinaryTree(new Node(existingByte, GetFrequencyOfByte(existingByte)));
            }
            for (uint i = positiveCount; i < 2 * positiveCount - 1; i++)
            {
                SortHuffmanTree();
                m_HuffmanTree[i - 1].IsInTheCurrentLevel = false;
                m_HuffmanTree[i - 2].IsInTheCurrentLevel = false;
                m_HuffmanTree[i] = new BinaryTree(new Node(m_HuffmanTree[i - 1].Node.Frequency + m_HuffmanTree[i - 2].Node.Frequency), ref m_HuffmanTree[i - 1], ref m_HuffmanTree[i - 2]);
            }
            SortHuffmanTree();
            BinaryTree finalHuffmanTree = m_HuffmanTree[2 * positiveCount - 2];
            Utility.PrintDebug(finalHuffmanTree.ToString());
            Utility.PrintDebug(finalHuffmanTree.GetHeight().ToString());
            Utility.PrintDebug();
            m_HuffmanDictionary = new Dictionary<byte, uint>();
            CreateHuffmanMap(ref finalHuffmanTree, 1);

            foreach (byte key in m_HuffmanDictionary.Keys)
            {
                Utility.PrintDebug("HuffmanDict[ " + key + " ] = " + m_HuffmanDictionary[key]);
            }
            Utility.PrintDebug();
        }
예제 #3
0
 private void CreateHuffmanMap(ref BinaryTree i_tree, uint i_code)
 {
     if (i_tree.Left != null)
     {
         BinaryTree tempTree = i_tree.Left;
         CreateHuffmanMap(ref tempTree, 2 * i_code + 0);
     }
     if (i_tree.Right != null)
     {
         BinaryTree tempTree = i_tree.Right;
         CreateHuffmanMap(ref tempTree, 2 * i_code + 1);
     }
     if (i_tree.Left == null && i_tree.Right == null)
     {
         if (m_HuffmanDictionary.ContainsValue(i_code))
         {
             throw new System.ArgumentException("Code alrady in the Huffman map");
         }
         m_HuffmanDictionary[i_tree.Node.Byte] = i_code;
     }
 }
예제 #4
0
        static void Main(string[] args)
        {
            bool Compress(ref HuffmanData data)
            {
                FrequencyTable frequencyTable = new FrequencyTable();

                foreach (byte b in data.uncompressedData)
                {
                    frequencyTable.Add(b);
                }

                /*
                 * Creation of the forest which is represented by a list of binary trees.
                 * We add one BinaryTree containing one Node representing a symbol for all the different possible symbols (byte).
                 * Each Node will thus have its symbol (byte) and its frequency (int).
                 */
                Forest forest = new Forest();

                foreach (byte b in frequencyTable.Keys)
                {
                    forest.Add(new BinaryTree(new Node(b, frequencyTable[b])));
                }

                /*
                 * This function is in charge of getting a unique binary tree from all the binary trees contained in the Forest.
                 * While the Forest has several trees, we remove the two trees containing the two nodes that have the lowest frequency
                 * and assign them to the left and right nodes of the root of a new BinaryTree that we finally had to the forest.
                 */
                BinaryTree binaryTree = forest.GetUniqueTree();

                /*
                 * This is where the Huffman codes are created.
                 * We go all through our BinaryTree thanks to the classic preorder method.
                 * When we reach a leaf, we add our code to dictionaries that we'll be using to compress our data.
                 * A code is represented by a List of Boolean.
                 */
                forest.Preorder(binaryTree.Root, new Code());
                Dictionary <Byte, Code> codeTable   = forest.getCodeTable();
                Dictionary <Code, Byte> decodeTable = forest.getDecodeTable();

                /*
                 * We get the number of bytes that are required to store our compressed data on a array of bytes.
                 * To do it, for each symbol (byte), we multiply its number of occurrences in our uncompressed data by the number of Boolean
                 * used to code the symbol. If the result is divisible by eight, we proceed to the division and this is our seeked number. If not,
                 * we add one extra byte.
                 */
                int GetRequiredBytesNumber()
                {
                    int size = 0;

                    foreach (Byte b in frequencyTable.Keys)
                    {
                        size += frequencyTable[b] * codeTable[b].Count;
                    }
                    return(size % 8 == 0 ? size / 8 : size / 8 + 1);
                }

                int RequiredBytesNumber = GetRequiredBytesNumber();

                /*
                 * Here starts the compressing.
                 */
                Byte[] compressedData = new Byte[RequiredBytesNumber];

                /*
                 * First, for all our symbols in our uncompressed data, let's add their Huffman code to a List of Booleans.
                 * This List only contains the bits that are very significant according to our Huffman Code.
                 */
                List <bool> compressedDataBoolean = new List <bool>();

                foreach (Byte b in data.uncompressedData)
                {
                    compressedDataBoolean.AddRange(codeTable[b]);
                }
                int sizeDataUncompressed = compressedDataBoolean.Count;

                /*
                 * This function will put all the booleans contained in our list, in bytes (eight by eight) to store them into
                 * an array of bytes.
                 */
                Byte[] BooleanListToByteArray(List <bool> boolList)
                {
                    Byte[]  _compressedData = new Byte[RequiredBytesNumber];
                    Byte    curr = 0;
                    int     i = 0, j = 0;
                    Boolean b;

                    while (boolList.Count > 0)
                    {
                        b = boolList.Last();
                        boolList.RemoveAt(boolList.Count - 1);
                        if (i == 0)
                        {
                            // If we iterate over a boolean that must be the first to be put in a byte ...
                            curr = new byte();
                            curr = 0x0;
                        }
                        if (b)
                        {
                            // If our boolean is true, then we need to put a one in our byte at the right position.
                            curr = (Byte)((1 << i) | curr);
                        }
                        else
                        {
                            // Same if it's false.
                            curr = (Byte)((0 << i) | curr);
                        }
                        if (i == 7)
                        {
                            // If we iterate over a boolean that will fill our byte, then we add the byte to our array.
                            _compressedData[j] = curr;
                            j++;
                            i = 0; // We get the 'byte pointer' ready for the next one.
                        }
                        else
                        {
                            i++;
                        }
                    }
                    // If we didn't get enough booleans to fill the final byte,
                    // we had the byte to the array (the other bits of the byte are zero by default).
                    if (i != 0)
                    {
                        _compressedData[j] = curr;
                    }
                    return(_compressedData);
                }

                compressedData = BooleanListToByteArray(compressedDataBoolean);

                /*
                 * We need to pass our frequency table as a List<KeyValuePair<Byte, int>>.
                 * It's required by the struct used by the tool.
                 */
                List <KeyValuePair <Byte, int> > frequency = frequencyTable.ToList();

                data.compressedData         = compressedData;
                data.frequency              = frequency;
                data.sizeOfUncompressedData = data.uncompressedData.Length;
                return(true);
            }

            bool Decompress(ref HuffmanData data)
            {
                /*
                 * We start by receiving the frequency table already created and we create (again) our forest from it.
                 * We then re-generate the Huffman codes for each symbol.
                 */
                Forest forest2        = new Forest();
                int    necessarybytes = 0;

                foreach (KeyValuePair <Byte, int> kvp in data.frequency)
                {
                    forest2.Add(new BinaryTree(new Node(kvp.Key, kvp.Value)));
                    necessarybytes += kvp.Value;
                }

                BinaryTree binaryTree2 = forest2.GetUniqueTree();

                forest2.Preorder(binaryTree2.Root, new Code());

                /*
                 * To decompress our data, we must parse our compressed data. For an easier writing of the code, We will parse
                 * it from a BitArray, so that we don't have to jump from byte to byte.
                 */
                BitArray bitArray = new BitArray(data.compressedData);

                /*
                 * In some cases, the first byte isn't full of significant bits for our Huffman encoding. It means, that the first
                 * zeros must be ignored (because they're only here to fill the byte). So we need to start parsing after the non-significant
                 * bits. In order to do that, we first retrieve the number of bits that are significant (sizeDataUncompressed).
                 * We just need to adjust our index (start) according to that number.
                 */
                int sizeDataUncompressed = 0;

                foreach (KeyValuePair <Byte, int> kvp in data.frequency)
                {
                    sizeDataUncompressed += kvp.Value * forest2.getCodeTable()[kvp.Key].Count;
                }
                int start = sizeDataUncompressed % 8 == 0 ? 0 : 8 - sizeDataUncompressed % 8;

                /*
                 * This is the function that will parse the full stream of bits, and using the Huffman Table, will get each Symbol.
                 */
                forest2.ProduceByteArrayNew(bitArray, binaryTree2.Root, necessarybytes, start);
                Byte[] finalArray = forest2.GetByteArray();
                data.uncompressedData = finalArray;
                return(true);
            }

            /*
             * Actual Main instructions used to test the proper functioning of the program.
             */
            byte[]      testString = System.IO.File.ReadAllBytes(Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName) + @"\..\..\testString.txt");
            HuffmanData mainData   = new HuffmanData();

            mainData.uncompressedData = testString;
            Compress(ref mainData);
            Decompress(ref mainData);
        }
예제 #5
0
        static void Main(string[] args)
        {
            //class variables
            #region Variables
            StreamReader txtIn;
            StreamWriter txtOut = new StreamWriter(String.Format("encoding{0}", args[1]));
            byte         b      = 0;
            int          pow    = 7;
            char         ch;
            string       encoding;
            string       line;
            char         delimiter = '»';
            #endregion

            //check for proper usage
            #region Proper Usage
            if (args.Length != 2)
            {
                Console.WriteLine("Proper usage is: program.exe inFile outFile");
                Console.ReadKey();
                return;
            }
            //
            try
            {
                txtIn = new StreamReader(args[0]);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.ReadLine();
                return;
            }
            #endregion

            //read the file and count the character frequencies
            #region List of Character Frequencies
            LinkedList <CharacterFrequency> list = new LinkedList <CharacterFrequency>();
            char c = (char)txtIn.Read();

            while (!txtIn.EndOfStream)
            {
                findFrequency(c, list);
                c = (char)txtIn.Read();
            }
            findFrequency(c, list);

            txtIn.Close();
            #endregion

            //create a sorted linked list of binary tree nodes
            #region Sorted List of binary Tree Nodes of Character Frequencies
            SortedLinkedList <BinaryTreeNode <CharacterFrequency> > sll = new SortedLinkedList <BinaryTreeNode <CharacterFrequency> >();
            foreach (CharacterFrequency charf in list)
            {
                sll.Add(new BinaryTreeNode <CharacterFrequency>(charf, charf.Frequency));
            }
            #endregion

            //while the list is not empty, build the binary tree nodes, adding the two smallest values
            #region Building the Tree
            while (sll.Count > 1)
            {
                BinaryTreeNode <CharacterFrequency> left;
                BinaryTreeNode <CharacterFrequency> right;
                BinaryTreeNode <CharacterFrequency> phNode;
                left = sll.First.Value;
                sll.RemoveFirst();
                right = sll.First.Value;
                sll.RemoveFirst();
                phNode       = new BinaryTreeNode <CharacterFrequency>(null, (left.Value + right.Value));
                phNode.Left  = left;
                phNode.Right = right;
                sll.Add(phNode);
            }
            BinaryTree tree = new BinaryTree(sll.First.Value);
            tree.inOrder(tree.Root, "");

            //write the encoding table to a new file
            foreach (EncodingData d in tree.EncodingTable)
            {
                txtOut.WriteLine(String.Format("{0}", d.ToString()));
            }
            #endregion

            //compress the file
            #region Compression
            txtIn = new StreamReader(args[0]);

            StreamWriter compressedFile = new StreamWriter(args[1]);
            while (!txtIn.EndOfStream)
            {
                ch       = (char)txtIn.Read();
                encoding = findEncoding(ch, tree.EncodingTable);

                foreach (char character in encoding)
                {
                    if (character == '1')
                    {
                        b = (byte)(b | (byte)Math.Pow(2, pow));
                    }
                    pow--;
                    if (pow < 0)
                    {
                        pow = 7;
                        compressedFile.Write((char)b);
                        b = 0;
                    }
                }
            }
            if (pow != 7)
            {
                compressedFile.Write((char)b);
            }
            txtOut.Close();
            txtIn.Close();
            compressedFile.Close();
            #endregion

            //decompress the file
            #region Decompression
            txtIn = new StreamReader(String.Format("encoding{0}", args[1]));
            //decompression

            BinaryTree charTree = new BinaryTree(new BinaryTreeNode <CharacterFrequency>(new CharacterFrequency((char)0, 0), 0));
            BinaryTreeNode <CharacterFrequency> node = charTree.Root;
            while (!txtIn.EndOfStream)
            {
                line = txtIn.ReadLine();
                string[] holder;
                holder = line.Split(delimiter);

                if (holder.Length > 1)
                {
                    foreach (char letter2 in holder[1])
                    {
                        if (letter2 == '0')
                        {
                            if (node.Left == null)
                            {
                                node.Left = new BinaryTreeNode <CharacterFrequency>(new CharacterFrequency((char)0, 0), 0);
                            }
                            node = node.Left;
                        }
                        else
                        {
                            if (node.Right == null)
                            {
                                node.Right = new BinaryTreeNode <CharacterFrequency>(new CharacterFrequency((char)0, 0), 0);
                            }
                            node = node.Right;
                        }
                    }

                    byte holdingByte = 0;
                    Byte.TryParse(holder[0], out holdingByte);
                    node.Data.Ch = (char)holdingByte;
                    node         = charTree.Root;
                }
            }

            txtIn.Close();

            txtIn = new StreamReader(args[1]);
            StreamWriter decompressedFile = new StreamWriter(String.Format("decompressed{0}", args[0]));
            byte         decomp;
            byte         decompVar;
            pow  = 7;
            node = charTree.Root;
            while (!txtIn.EndOfStream)
            {
                decomp = (byte)txtIn.Read();
                while (pow >= 0)
                {
                    decompVar = (byte)(decomp & (byte)(Math.Pow(2, pow)));

                    if (decompVar > 0)
                    {
                        //if the variable is greater than 0, go to the right
                        node = node.Right;
                    }

                    else
                    {
                        node = node.Left;
                    }

                    if (node.isLeaf())
                    {
                        decompressedFile.Write(node.Data);
                        node = charTree.Root;
                    }
                    pow--;
                }
                pow = 7;
            }

            decompressedFile.Close();
            txtIn.Close();
            txtOut.Close();
            #endregion

            Console.ReadKey();
        }