public BinaryTree(Node i_node, ref BinaryTree i_left, ref BinaryTree i_right) { IsInTheCurrentLevel = true; Node = i_node; Left = i_left; Right = i_right; }
// create the Huffman tree based on the counted frequency private void CreateHuffmanTree() { uint positiveCount = GetNumberOfPositiveValuesInFrequencyTable(); m_HuffmanTree = new BinaryTree[2 * positiveCount - 1]; for (uint i = 0; i < positiveCount; i++) { byte existingByte = GetExistingByteAtIndex(i); m_HuffmanTree[i] = new BinaryTree(new Node(existingByte, GetFrequencyOfByte(existingByte))); } for (uint i = positiveCount; i < 2 * positiveCount - 1; i++) { SortHuffmanTree(); m_HuffmanTree[i - 1].IsInTheCurrentLevel = false; m_HuffmanTree[i - 2].IsInTheCurrentLevel = false; m_HuffmanTree[i] = new BinaryTree(new Node(m_HuffmanTree[i - 1].Node.Frequency + m_HuffmanTree[i - 2].Node.Frequency), ref m_HuffmanTree[i - 1], ref m_HuffmanTree[i - 2]); } SortHuffmanTree(); BinaryTree finalHuffmanTree = m_HuffmanTree[2 * positiveCount - 2]; Utility.PrintDebug(finalHuffmanTree.ToString()); Utility.PrintDebug(finalHuffmanTree.GetHeight().ToString()); Utility.PrintDebug(); m_HuffmanDictionary = new Dictionary<byte, uint>(); CreateHuffmanMap(ref finalHuffmanTree, 1); foreach (byte key in m_HuffmanDictionary.Keys) { Utility.PrintDebug("HuffmanDict[ " + key + " ] = " + m_HuffmanDictionary[key]); } Utility.PrintDebug(); }
private void CreateHuffmanMap(ref BinaryTree i_tree, uint i_code) { if (i_tree.Left != null) { BinaryTree tempTree = i_tree.Left; CreateHuffmanMap(ref tempTree, 2 * i_code + 0); } if (i_tree.Right != null) { BinaryTree tempTree = i_tree.Right; CreateHuffmanMap(ref tempTree, 2 * i_code + 1); } if (i_tree.Left == null && i_tree.Right == null) { if (m_HuffmanDictionary.ContainsValue(i_code)) { throw new System.ArgumentException("Code alrady in the Huffman map"); } m_HuffmanDictionary[i_tree.Node.Byte] = i_code; } }
static void Main(string[] args) { bool Compress(ref HuffmanData data) { FrequencyTable frequencyTable = new FrequencyTable(); foreach (byte b in data.uncompressedData) { frequencyTable.Add(b); } /* * Creation of the forest which is represented by a list of binary trees. * We add one BinaryTree containing one Node representing a symbol for all the different possible symbols (byte). * Each Node will thus have its symbol (byte) and its frequency (int). */ Forest forest = new Forest(); foreach (byte b in frequencyTable.Keys) { forest.Add(new BinaryTree(new Node(b, frequencyTable[b]))); } /* * This function is in charge of getting a unique binary tree from all the binary trees contained in the Forest. * While the Forest has several trees, we remove the two trees containing the two nodes that have the lowest frequency * and assign them to the left and right nodes of the root of a new BinaryTree that we finally had to the forest. */ BinaryTree binaryTree = forest.GetUniqueTree(); /* * This is where the Huffman codes are created. * We go all through our BinaryTree thanks to the classic preorder method. * When we reach a leaf, we add our code to dictionaries that we'll be using to compress our data. * A code is represented by a List of Boolean. */ forest.Preorder(binaryTree.Root, new Code()); Dictionary <Byte, Code> codeTable = forest.getCodeTable(); Dictionary <Code, Byte> decodeTable = forest.getDecodeTable(); /* * We get the number of bytes that are required to store our compressed data on a array of bytes. * To do it, for each symbol (byte), we multiply its number of occurrences in our uncompressed data by the number of Boolean * used to code the symbol. If the result is divisible by eight, we proceed to the division and this is our seeked number. If not, * we add one extra byte. */ int GetRequiredBytesNumber() { int size = 0; foreach (Byte b in frequencyTable.Keys) { size += frequencyTable[b] * codeTable[b].Count; } return(size % 8 == 0 ? size / 8 : size / 8 + 1); } int RequiredBytesNumber = GetRequiredBytesNumber(); /* * Here starts the compressing. */ Byte[] compressedData = new Byte[RequiredBytesNumber]; /* * First, for all our symbols in our uncompressed data, let's add their Huffman code to a List of Booleans. * This List only contains the bits that are very significant according to our Huffman Code. */ List <bool> compressedDataBoolean = new List <bool>(); foreach (Byte b in data.uncompressedData) { compressedDataBoolean.AddRange(codeTable[b]); } int sizeDataUncompressed = compressedDataBoolean.Count; /* * This function will put all the booleans contained in our list, in bytes (eight by eight) to store them into * an array of bytes. */ Byte[] BooleanListToByteArray(List <bool> boolList) { Byte[] _compressedData = new Byte[RequiredBytesNumber]; Byte curr = 0; int i = 0, j = 0; Boolean b; while (boolList.Count > 0) { b = boolList.Last(); boolList.RemoveAt(boolList.Count - 1); if (i == 0) { // If we iterate over a boolean that must be the first to be put in a byte ... curr = new byte(); curr = 0x0; } if (b) { // If our boolean is true, then we need to put a one in our byte at the right position. curr = (Byte)((1 << i) | curr); } else { // Same if it's false. curr = (Byte)((0 << i) | curr); } if (i == 7) { // If we iterate over a boolean that will fill our byte, then we add the byte to our array. _compressedData[j] = curr; j++; i = 0; // We get the 'byte pointer' ready for the next one. } else { i++; } } // If we didn't get enough booleans to fill the final byte, // we had the byte to the array (the other bits of the byte are zero by default). if (i != 0) { _compressedData[j] = curr; } return(_compressedData); } compressedData = BooleanListToByteArray(compressedDataBoolean); /* * We need to pass our frequency table as a List<KeyValuePair<Byte, int>>. * It's required by the struct used by the tool. */ List <KeyValuePair <Byte, int> > frequency = frequencyTable.ToList(); data.compressedData = compressedData; data.frequency = frequency; data.sizeOfUncompressedData = data.uncompressedData.Length; return(true); } bool Decompress(ref HuffmanData data) { /* * We start by receiving the frequency table already created and we create (again) our forest from it. * We then re-generate the Huffman codes for each symbol. */ Forest forest2 = new Forest(); int necessarybytes = 0; foreach (KeyValuePair <Byte, int> kvp in data.frequency) { forest2.Add(new BinaryTree(new Node(kvp.Key, kvp.Value))); necessarybytes += kvp.Value; } BinaryTree binaryTree2 = forest2.GetUniqueTree(); forest2.Preorder(binaryTree2.Root, new Code()); /* * To decompress our data, we must parse our compressed data. For an easier writing of the code, We will parse * it from a BitArray, so that we don't have to jump from byte to byte. */ BitArray bitArray = new BitArray(data.compressedData); /* * In some cases, the first byte isn't full of significant bits for our Huffman encoding. It means, that the first * zeros must be ignored (because they're only here to fill the byte). So we need to start parsing after the non-significant * bits. In order to do that, we first retrieve the number of bits that are significant (sizeDataUncompressed). * We just need to adjust our index (start) according to that number. */ int sizeDataUncompressed = 0; foreach (KeyValuePair <Byte, int> kvp in data.frequency) { sizeDataUncompressed += kvp.Value * forest2.getCodeTable()[kvp.Key].Count; } int start = sizeDataUncompressed % 8 == 0 ? 0 : 8 - sizeDataUncompressed % 8; /* * This is the function that will parse the full stream of bits, and using the Huffman Table, will get each Symbol. */ forest2.ProduceByteArrayNew(bitArray, binaryTree2.Root, necessarybytes, start); Byte[] finalArray = forest2.GetByteArray(); data.uncompressedData = finalArray; return(true); } /* * Actual Main instructions used to test the proper functioning of the program. */ byte[] testString = System.IO.File.ReadAllBytes(Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName) + @"\..\..\testString.txt"); HuffmanData mainData = new HuffmanData(); mainData.uncompressedData = testString; Compress(ref mainData); Decompress(ref mainData); }
static void Main(string[] args) { //class variables #region Variables StreamReader txtIn; StreamWriter txtOut = new StreamWriter(String.Format("encoding{0}", args[1])); byte b = 0; int pow = 7; char ch; string encoding; string line; char delimiter = 'ยป'; #endregion //check for proper usage #region Proper Usage if (args.Length != 2) { Console.WriteLine("Proper usage is: program.exe inFile outFile"); Console.ReadKey(); return; } // try { txtIn = new StreamReader(args[0]); } catch (Exception ex) { Console.WriteLine(ex.Message); Console.ReadLine(); return; } #endregion //read the file and count the character frequencies #region List of Character Frequencies LinkedList <CharacterFrequency> list = new LinkedList <CharacterFrequency>(); char c = (char)txtIn.Read(); while (!txtIn.EndOfStream) { findFrequency(c, list); c = (char)txtIn.Read(); } findFrequency(c, list); txtIn.Close(); #endregion //create a sorted linked list of binary tree nodes #region Sorted List of binary Tree Nodes of Character Frequencies SortedLinkedList <BinaryTreeNode <CharacterFrequency> > sll = new SortedLinkedList <BinaryTreeNode <CharacterFrequency> >(); foreach (CharacterFrequency charf in list) { sll.Add(new BinaryTreeNode <CharacterFrequency>(charf, charf.Frequency)); } #endregion //while the list is not empty, build the binary tree nodes, adding the two smallest values #region Building the Tree while (sll.Count > 1) { BinaryTreeNode <CharacterFrequency> left; BinaryTreeNode <CharacterFrequency> right; BinaryTreeNode <CharacterFrequency> phNode; left = sll.First.Value; sll.RemoveFirst(); right = sll.First.Value; sll.RemoveFirst(); phNode = new BinaryTreeNode <CharacterFrequency>(null, (left.Value + right.Value)); phNode.Left = left; phNode.Right = right; sll.Add(phNode); } BinaryTree tree = new BinaryTree(sll.First.Value); tree.inOrder(tree.Root, ""); //write the encoding table to a new file foreach (EncodingData d in tree.EncodingTable) { txtOut.WriteLine(String.Format("{0}", d.ToString())); } #endregion //compress the file #region Compression txtIn = new StreamReader(args[0]); StreamWriter compressedFile = new StreamWriter(args[1]); while (!txtIn.EndOfStream) { ch = (char)txtIn.Read(); encoding = findEncoding(ch, tree.EncodingTable); foreach (char character in encoding) { if (character == '1') { b = (byte)(b | (byte)Math.Pow(2, pow)); } pow--; if (pow < 0) { pow = 7; compressedFile.Write((char)b); b = 0; } } } if (pow != 7) { compressedFile.Write((char)b); } txtOut.Close(); txtIn.Close(); compressedFile.Close(); #endregion //decompress the file #region Decompression txtIn = new StreamReader(String.Format("encoding{0}", args[1])); //decompression BinaryTree charTree = new BinaryTree(new BinaryTreeNode <CharacterFrequency>(new CharacterFrequency((char)0, 0), 0)); BinaryTreeNode <CharacterFrequency> node = charTree.Root; while (!txtIn.EndOfStream) { line = txtIn.ReadLine(); string[] holder; holder = line.Split(delimiter); if (holder.Length > 1) { foreach (char letter2 in holder[1]) { if (letter2 == '0') { if (node.Left == null) { node.Left = new BinaryTreeNode <CharacterFrequency>(new CharacterFrequency((char)0, 0), 0); } node = node.Left; } else { if (node.Right == null) { node.Right = new BinaryTreeNode <CharacterFrequency>(new CharacterFrequency((char)0, 0), 0); } node = node.Right; } } byte holdingByte = 0; Byte.TryParse(holder[0], out holdingByte); node.Data.Ch = (char)holdingByte; node = charTree.Root; } } txtIn.Close(); txtIn = new StreamReader(args[1]); StreamWriter decompressedFile = new StreamWriter(String.Format("decompressed{0}", args[0])); byte decomp; byte decompVar; pow = 7; node = charTree.Root; while (!txtIn.EndOfStream) { decomp = (byte)txtIn.Read(); while (pow >= 0) { decompVar = (byte)(decomp & (byte)(Math.Pow(2, pow))); if (decompVar > 0) { //if the variable is greater than 0, go to the right node = node.Right; } else { node = node.Left; } if (node.isLeaf()) { decompressedFile.Write(node.Data); node = charTree.Root; } pow--; } pow = 7; } decompressedFile.Close(); txtIn.Close(); txtOut.Close(); #endregion Console.ReadKey(); }