//Each block of compressed data begins with 3 header bits // containing the following data: // first bit BFINAL // next 2 bits BTYPE // Note that the header bits do not necessarily begin on a byte // boundary, since a block does not necessarily occupy an integral // number of bytes. // BFINAL is set if and only if this is the last block of the data // set. // BTYPE specifies how the data are compressed, as follows: // 00 - no compression // 01 - compressed with fixed Huffman codes // 10 - compressed with dynamic Huffman codes // 11 - reserved (error) // The only difference between the two compressed cases is how the // Huffman codes for the literal/length and distance alphabets are // defined. // // This function returns true for success (end of block or output window is full,) // false if we are short of input // private bool Decode() { var eob = false; var result = false; if (Finished()) { return(true); } if (_hasFormatReader) { switch (_state) { case InflaterState.ReadingHeader: if (!_formatReader.ReadHeader(_input)) { return(false); } _state = InflaterState.ReadingBFinal; break; case InflaterState.ReadingFooter: case InflaterState.StartReadingFooter: if (!_formatReader.ReadFooter(_input)) { return(false); } _state = InflaterState.VerifyingFooter; return(true); } } if (_state == InflaterState.ReadingBFinal) { // reading bfinal bit // Need 1 bit if (!_input.EnsureBitsAvailable(1)) { return(false); } _bfinal = _input.GetBits(1); _state = InflaterState.ReadingBType; } if (_state == InflaterState.ReadingBType) { // Need 2 bits if (!_input.EnsureBitsAvailable(2)) { _state = InflaterState.ReadingBType; return(false); } _blockType = (BlockType)_input.GetBits(2); switch (_blockType) { case BlockType.Dynamic: _state = InflaterState.ReadingNumLitCodes; break; case BlockType.Static: _literalLengthTree = HuffmanTree.StaticLiteralLengthTree; _distanceTree = HuffmanTree.StaticDistanceTree; _state = InflaterState.DecodeTop; break; case BlockType.Uncompressed: _state = InflaterState.UncompressedAligning; break; default: throw new InvalidDataContractException("Unknown block type."); } } switch (_blockType) { case BlockType.Dynamic: result = _state < InflaterState.DecodeTop ? DecodeDynamicBlockHeader() : DecodeBlock(out eob); break; case BlockType.Static: result = DecodeBlock(out eob); break; case BlockType.Uncompressed: result = DecodeUncompressedBlock(out eob); break; default: throw new InvalidDataContractException("Unknown block type."); } // // If we reached the end of the block and the block we were decoding had // bfinal=1 (final block) // if (!eob || (_bfinal == 0)) { return(result); } _state = _hasFormatReader ? InflaterState.StartReadingFooter : InflaterState.Done; return(result); }
// Format of the dynamic block header: // 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286) // 5 Bits: HDIST, # of Distance codes - 1 (1 - 32) // 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19) // // (HCLEN + 4) x 3 bits: code lengths for the code length // alphabet given just above, in the order: 16, 17, 18, // 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 // // These code lengths are interpreted as 3-bit integers // (0-7); as above, a code length of 0 means the // corresponding symbol (literal/length or distance code // length) is not used. // // HLIT + 257 code lengths for the literal/length alphabet, // encoded using the code length Huffman code // // HDIST + 1 code lengths for the distance alphabet, // encoded using the code length Huffman code // // The code length repeat codes can cross from HLIT + 257 to the // HDIST + 1 code lengths. In other words, all code lengths form // a single sequence of HLIT + HDIST + 258 values. private bool DecodeDynamicBlockHeader() { switch (_state) { case InflaterState.ReadingNumLitCodes: _literalLengthCodeCount = _input.GetBits(5); if (_literalLengthCodeCount < 0) { return(false); } _literalLengthCodeCount += 257; _state = InflaterState.ReadingNumDistCodes; goto case InflaterState.ReadingNumDistCodes; case InflaterState.ReadingNumDistCodes: _distanceCodeCount = _input.GetBits(5); if (_distanceCodeCount < 0) { return(false); } _distanceCodeCount += 1; _state = InflaterState.ReadingNumCodeLengthCodes; goto case InflaterState.ReadingNumCodeLengthCodes; case InflaterState.ReadingNumCodeLengthCodes: _codeLengthCodeCount = _input.GetBits(4); if (_codeLengthCodeCount < 0) { return(false); } _codeLengthCodeCount += 4; _loopCounter = 0; _state = InflaterState.ReadingCodeLengthCodes; goto case InflaterState.ReadingCodeLengthCodes; case InflaterState.ReadingCodeLengthCodes: while (_loopCounter < _codeLengthCodeCount) { var bits = _input.GetBits(3); if (bits < 0) { return(false); } _codeLengthTreeCodeLength[_codeOrder[_loopCounter]] = (byte)bits; ++_loopCounter; } for (var i = _codeLengthCodeCount; i < _codeOrder.Length; i++) { _codeLengthTreeCodeLength[_codeOrder[i]] = 0; } // create huffman tree for code length _codeLengthTree = new HuffmanTree(_codeLengthTreeCodeLength); _codeArraySize = _literalLengthCodeCount + _distanceCodeCount; _loopCounter = 0; // reset loop count _state = InflaterState.ReadingTreeCodesBefore; goto case InflaterState.ReadingTreeCodesBefore; case InflaterState.ReadingTreeCodesBefore: case InflaterState.ReadingTreeCodesAfter: while (_loopCounter < _codeArraySize) { if (_state == InflaterState.ReadingTreeCodesBefore) { if ((_lengthCode = _codeLengthTree.GetNextSymbol(_input)) < 0) { return(false); } } // The alphabet for code lengths is as follows: // 0 - 15: Represent code lengths of 0 - 15 // 16: Copy the previous code length 3 - 6 times. // The next 2 bits indicate repeat length // (0 = 3, ... , 3 = 6) // Example: Codes 8, 16 (+2 bits 11), // 16 (+2 bits 10) will expand to // 12 code lengths of 8 (1 + 6 + 5) // 17: Repeat a code length of 0 for 3 - 10 times. // (3 bits of length) // 18: Repeat a code length of 0 for 11 - 138 times // (7 bits of length) if (_lengthCode <= 15) { _codeList[_loopCounter++] = (byte)_lengthCode; } else { if (!_input.EnsureBitsAvailable(7)) { // it doesn't matter if we require more bits here _state = InflaterState.ReadingTreeCodesAfter; return(false); } int repeatCount; switch (_lengthCode) { case 16: { if (_loopCounter == 0) { throw new InvalidDataContractException(); } var previousCode = _codeList[_loopCounter - 1]; repeatCount = _input.GetBits(2) + 3; if (_loopCounter + repeatCount > _codeArraySize) { throw new InvalidDataContractException(); } for (var j = 0; j < repeatCount; j++) { _codeList[_loopCounter++] = previousCode; } } break; case 17: repeatCount = _input.GetBits(3) + 3; if (_loopCounter + repeatCount > _codeArraySize) { throw new InvalidDataContractException(); } for (var j = 0; j < repeatCount; j++) { _codeList[_loopCounter++] = 0; } break; default: repeatCount = _input.GetBits(7) + 11; if (_loopCounter + repeatCount > _codeArraySize) { throw new InvalidDataContractException(); } for (var j = 0; j < repeatCount; j++) { _codeList[_loopCounter++] = 0; } break; } } _state = InflaterState.ReadingTreeCodesBefore; // we want to read the next code. } break; default: throw new InvalidDataContractException("Unknown state."); } var literalTreeCodeLength = new byte[HuffmanTree.MaxLiteralTreeElements]; var distanceTreeCodeLength = new byte[HuffmanTree.MaxDistTreeElements]; // Create literal and distance tables Array.Copy(_codeList, literalTreeCodeLength, _literalLengthCodeCount); Array.Copy(_codeList, _literalLengthCodeCount, distanceTreeCodeLength, 0, _distanceCodeCount); // Make sure there is an end-of-block code, otherwise how could we ever end? if (literalTreeCodeLength[HuffmanTree.EndOfBlockCode] == 0) { throw new InvalidDataContractException(); } _literalLengthTree = new HuffmanTree(literalTreeCodeLength); _distanceTree = new HuffmanTree(distanceTreeCodeLength); _state = InflaterState.DecodeTop; return(true); }
static HuffmanTree() { // construct the static literal tree and distance tree _staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength()); _staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength()); }