private static void EncodeTree(HuffmanTree tree, ushort[] buffer, ref int bitPosition) { if (tree.IsLeaf()) { // put a 1 bit buffer[bitPosition / 16] |= (ushort)(0x8000 >> (int)(bitPosition % 16)); bitPosition++; // append the char code AppendCode(buffer, (ushort)tree.Character, ref bitPosition); return; } // if it's not a leaf, put a 0 bit bitPosition++; EncodeTree(tree.Left, buffer, ref bitPosition); EncodeTree(tree.Right, buffer, ref bitPosition); }
// reads and decodes `filename`, returns the decoded text public static string Decode(string filename) { byte[] buffer; try { buffer = File.ReadAllBytes(filename); } catch { throw; } if (buffer.Length == 0) { // empty file return(""); } // 1. read metadata // first 6 bits int fileFormatVersion = buffer[0] & 0xFC; if (fileFormatVersion > FileFormatVersion) { throw new FormatException("File '" + filename + "' has a newer format (file format version " + fileFormatVersion.ToString("00") + "), or is invalid or corrupted."); } if (buffer.Length < 8) { // 4 bytes for metadata, // at least 3 bytes for tree (a 1 bit and 16 bits for char code), // and at least 1 byte for the text throw new FormatException("File '" + filename + "' is invalid or corrupted."); } int treeLengthInBytes = ((int)(buffer[0] & 0x03) << 16) | ((int)buffer[1] << 8) | ((int)buffer[2]); int bitsOfLastByteOfTree = buffer[3] >> 4; int bitsOfLastByteOfText = buffer[3] & 0x0F; if (bitsOfLastByteOfTree < 1 || bitsOfLastByteOfTree > 8 || bitsOfLastByteOfText < 1 || bitsOfLastByteOfText > 8 || treeLengthInBytes < 3 || treeLengthInBytes > 147456 || buffer.Length < 5 + treeLengthInBytes) { throw new FormatException("File '" + filename + "' is invalid or corrupted."); } // 2. decode the tree HuffmanTree tree = new HuffmanTree(); // the first 32 bits are for metadata int treeBitPosition = 32; try { DecodeTree(tree, buffer, ref treeBitPosition, (3 + treeLengthInBytes) * 8 + bitsOfLastByteOfTree); } catch (FormatException) { throw new FormatException("File '" + filename + "' is invalid or corrupted."); } // 3. decode the text string decodedText = ""; // if the text entirely consists of a single character if (tree.IsLeaf()) { for (int i = 4 + treeLengthInBytes; i < buffer.Length; i++) { // the huffman code for the only character must be 0 if (buffer[i] != 0) { throw new FormatException("File '" + filename + "' is invalid or corrupted."); } } // length of the huffman code is 1, // so the number of characters equals the number of bits of the encoded text int numOfChars = (buffer.Length - treeLengthInBytes - 5) * 8 + bitsOfLastByteOfText; for (int i = 0; i < numOfChars; i++) { decodedText += tree.Character; } return(decodedText); } HuffmanTree currentNode = tree; for (int i = 4 + treeLengthInBytes; i < buffer.Length - 1; i++) { for (byte mask = 0x80; mask != 0; mask >>= 1) { if ((buffer[i] & mask) != 0) { // reached bit 1, go right currentNode = currentNode.Right; } else { // reached bit 0, go left currentNode = currentNode.Left; } if (currentNode == null) { throw new FormatException("File '" + filename + "' is invalid or corrupted."); } if (currentNode.IsLeaf()) { // reached a leaf, read a character decodedText += currentNode.Character; // go to the root currentNode = tree; } } } // only some bits of the last byte (bitsOfLastByteOfEncodedText) are real data for (byte mask = 0x80, count = 0; count < bitsOfLastByteOfText; mask >>= 1, count++) { if ((buffer[buffer.Length - 1] & mask) != 0) { // reached bit 1, go right currentNode = currentNode.Right; } else { // reached bit 0, go left currentNode = currentNode.Left; } if (currentNode == null) { throw new FormatException("File '" + filename + "' is invalid or corrupted."); } if (currentNode.IsLeaf()) { // reached a leaf, read a character decodedText += currentNode.Character; // go to the root currentNode = tree; } } return(decodedText); }