private void Reset() { _state = _hasFormatReader ? Inflater64State.ReadingHeader : // start by reading Header info Inflater64State.ReadingBFinal; // start by reading BFinal bit }
// Format of the dynamic block header: // 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286) // 5 Bits: HDIST, # of Distance codes - 1 (1 - 32) // 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19) // // (HCLEN + 4) x 3 bits: code lengths for the code length // alphabet given just above, in the order: 16, 17, 18, // 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 // // These code lengths are interpreted as 3-bit integers // (0-7); as above, a code length of 0 means the // corresponding symbol (literal/length or distance code // length) is not used. // // HLIT + 257 code lengths for the literal/length alphabet, // encoded using the code length Huffman code // // HDIST + 1 code lengths for the distance alphabet, // encoded using the code length Huffman code // // The code length repeat codes can cross from HLIT + 257 to the // HDIST + 1 code lengths. In other words, all code lengths form // a single sequence of HLIT + HDIST + 258 values. private bool DecodeDynamicBlockHeader() { switch (_state) { case Inflater64State.ReadingNumLitCodes: _literalLengthCodeCount = _input.GetBits(5); if (_literalLengthCodeCount < 0) { return(false); } _literalLengthCodeCount += 257; _state = Inflater64State.ReadingNumDistCodes; goto case Inflater64State.ReadingNumDistCodes; case Inflater64State.ReadingNumDistCodes: _distanceCodeCount = _input.GetBits(5); if (_distanceCodeCount < 0) { return(false); } _distanceCodeCount += 1; _state = Inflater64State.ReadingNumCodeLengthCodes; goto case Inflater64State.ReadingNumCodeLengthCodes; case Inflater64State.ReadingNumCodeLengthCodes: _codeLengthCodeCount = _input.GetBits(4); if (_codeLengthCodeCount < 0) { return(false); } _codeLengthCodeCount += 4; _loopCounter = 0; _state = Inflater64State.ReadingCodeLengthCodes; goto case Inflater64State.ReadingCodeLengthCodes; case Inflater64State.ReadingCodeLengthCodes: while (_loopCounter < _codeLengthCodeCount) { int bits = _input.GetBits(3); if (bits < 0) { return(false); } _codeLengthTreeCodeLength[s_codeOrder[_loopCounter]] = (byte)bits; ++_loopCounter; } for (int i = _codeLengthCodeCount; i < s_codeOrder.Length; i++) { _codeLengthTreeCodeLength[s_codeOrder[i]] = 0; } // create huffman tree for code length _codeLengthTree = new HuffmanTree(_codeLengthTreeCodeLength); _codeArraySize = _literalLengthCodeCount + _distanceCodeCount; _loopCounter = 0; // reset loop count _state = Inflater64State.ReadingTreeCodesBefore; goto case Inflater64State.ReadingTreeCodesBefore; case Inflater64State.ReadingTreeCodesBefore: case Inflater64State.ReadingTreeCodesAfter: while (_loopCounter < _codeArraySize) { if (_state == Inflater64State.ReadingTreeCodesBefore) { if ((_lengthCode = _codeLengthTree.GetNextSymbol(_input)) < 0) { return(false); } } // The alphabet for code lengths is as follows: // 0 - 15: Represent code lengths of 0 - 15 // 16: Copy the previous code length 3 - 6 times. // The next 2 bits indicate repeat length // (0 = 3, ... , 3 = 6) // Example: Codes 8, 16 (+2 bits 11), // 16 (+2 bits 10) will expand to // 12 code lengths of 8 (1 + 6 + 5) // 17: Repeat a code length of 0 for 3 - 10 times. // (3 bits of length) // 18: Repeat a code length of 0 for 11 - 138 times // (7 bits of length) if (_lengthCode <= 15) { _codeList[_loopCounter++] = (byte)_lengthCode; } else { int repeatCount; if (_lengthCode == 16) { if (!_input.EnsureBitsAvailable(2)) { _state = Inflater64State.ReadingTreeCodesAfter; return(false); } if (_loopCounter == 0) { // can't have "prev code" on first code throw new InvalidDataException(); } byte previousCode = _codeList[_loopCounter - 1]; repeatCount = _input.GetBits(2) + 3; if (_loopCounter + repeatCount > _codeArraySize) { throw new InvalidDataException(); } for (int j = 0; j < repeatCount; j++) { _codeList[_loopCounter++] = previousCode; } } else if (_lengthCode == 17) { if (!_input.EnsureBitsAvailable(3)) { _state = Inflater64State.ReadingTreeCodesAfter; return(false); } repeatCount = _input.GetBits(3) + 3; if (_loopCounter + repeatCount > _codeArraySize) { throw new InvalidDataException(); } for (int j = 0; j < repeatCount; j++) { _codeList[_loopCounter++] = 0; } } else { // code == 18 if (!_input.EnsureBitsAvailable(7)) { _state = Inflater64State.ReadingTreeCodesAfter; return(false); } repeatCount = _input.GetBits(7) + 11; if (_loopCounter + repeatCount > _codeArraySize) { throw new InvalidDataException(); } for (int j = 0; j < repeatCount; j++) { _codeList[_loopCounter++] = 0; } } } _state = Inflater64State.ReadingTreeCodesBefore; // we want to read the next code. } break; default: Debug.Fail("check why we are here!"); throw new InvalidDataException(SR.UnknownState); } byte[] literalTreeCodeLength = new byte[HuffmanTree.MaxLiteralTreeElements]; byte[] distanceTreeCodeLength = new byte[HuffmanTree.MaxDistTreeElements]; // Create literal and distance tables Array.Copy(_codeList, 0, literalTreeCodeLength, 0, _literalLengthCodeCount); Array.Copy(_codeList, _literalLengthCodeCount, distanceTreeCodeLength, 0, _distanceCodeCount); // Make sure there is an end-of-block code, otherwise how could we ever end? if (literalTreeCodeLength[HuffmanTree.EndOfBlockCode] == 0) { throw new InvalidDataException(); } _literalLengthTree = new HuffmanTree(literalTreeCodeLength); _distanceTree = new HuffmanTree(distanceTreeCodeLength); _state = Inflater64State.DecodeTop; return(true); }
// Format of Non-compressed blocks (BTYPE=00): // // Any bits of input up to the next byte boundary are ignored. // The rest of the block consists of the following information: // // 0 1 2 3 4... // +---+---+---+---+================================+ // | LEN | NLEN |... LEN bytes of literal data...| // +---+---+---+---+================================+ // // LEN is the number of data bytes in the block. NLEN is the // one's complement of LEN. private bool DecodeUncompressedBlock(out bool end_of_block) { end_of_block = false; while (true) { switch (_state) { case Inflater64State.UncompressedAligning: // initial state when calling this function // we must skip to a byte boundary _input.SkipToByteBoundary(); _state = Inflater64State.UncompressedByte1; goto case Inflater64State.UncompressedByte1; case Inflater64State.UncompressedByte1: // decoding block length case Inflater64State.UncompressedByte2: case Inflater64State.UncompressedByte3: case Inflater64State.UncompressedByte4: int bits = _input.GetBits(8); if (bits < 0) { return(false); } _blockLengthBuffer[_state - Inflater64State.UncompressedByte1] = (byte)bits; if (_state == Inflater64State.UncompressedByte4) { _blockLength = _blockLengthBuffer[0] + _blockLengthBuffer[1] * 256; int blockLengthComplement = _blockLengthBuffer[2] + _blockLengthBuffer[3] * 256; // make sure complement matches if ((ushort)_blockLength != (ushort)(~blockLengthComplement)) { throw new InvalidDataException(SR.InvalidBlockLength); } } _state += 1; break; case Inflater64State.DecodingUncompressed: // copying block data // Directly copy bytes from input to output. int bytesCopied = _output.CopyFrom(_input, _blockLength); _blockLength -= bytesCopied; if (_blockLength == 0) { // Done with this block, need to re-init bit buffer for next block _state = Inflater64State.ReadingBFinal; end_of_block = true; return(true); } // We can fail to copy all bytes for two reasons: // Running out of Input // running out of free space in output window if (_output.FreeBytes == 0) { return(true); } return(false); default: Debug.Fail("check why we are here!"); throw new InvalidDataException(SR.UnknownState); } } }
private bool DecodeBlock(out bool end_of_block_code_seen) { end_of_block_code_seen = false; int freeBytes = _output.FreeBytes; // it is a little bit faster than frequently accessing the property while (freeBytes > 65536) { // With Deflate64 we can have up to a 64kb length, so we ensure at least that much space is available // in the OutputWindow to avoid overwriting previous unflushed output data. int symbol; switch (_state) { case Inflater64State.DecodeTop: // decode an element from the literal tree // TODO: optimize this!!! symbol = _literalLengthTree.GetNextSymbol(_input); if (symbol < 0) { // running out of input return(false); } if (symbol < 256) { // literal _output.Write((byte)symbol); --freeBytes; } else if (symbol == 256) { // end of block end_of_block_code_seen = true; // Reset state _state = Inflater64State.ReadingBFinal; return(true); } else { // length/distance pair symbol -= 257; // length code started at 257 if (symbol < 8) { symbol += 3; // match length = 3,4,5,6,7,8,9,10 _extraBits = 0; } else { if (symbol < 0 || symbol >= s_extraLengthBits.Length) { throw new InvalidDataException(SR.GenericInvalidData); } _extraBits = s_extraLengthBits[symbol]; Debug.Assert(_extraBits != 0, "We handle other cases separately!"); } _length = symbol; goto case Inflater64State.HaveInitialLength; } break; case Inflater64State.HaveInitialLength: if (_extraBits > 0) { _state = Inflater64State.HaveInitialLength; int bits = _input.GetBits(_extraBits); if (bits < 0) { return(false); } if (_length < 0 || _length >= s_lengthBase.Length) { throw new InvalidDataException(SR.GenericInvalidData); } _length = s_lengthBase[_length] + bits; } _state = Inflater64State.HaveFullLength; goto case Inflater64State.HaveFullLength; case Inflater64State.HaveFullLength: if (_blockType == BlockType.Dynamic) { _distanceCode = _distanceTree.GetNextSymbol(_input); } else { // get distance code directly for static block _distanceCode = _input.GetBits(5); if (_distanceCode >= 0) { _distanceCode = s_staticDistanceTreeTable[_distanceCode]; } } if (_distanceCode < 0) { // running out input return(false); } _state = Inflater64State.HaveDistCode; goto case Inflater64State.HaveDistCode; case Inflater64State.HaveDistCode: // To avoid a table lookup we note that for distanceCode > 3, // extra_bits = (distanceCode-2) >> 1 int offset; if (_distanceCode > 3) { _extraBits = (_distanceCode - 2) >> 1; int bits = _input.GetBits(_extraBits); if (bits < 0) { return(false); } offset = s_distanceBasePosition[_distanceCode] + bits; } else { offset = _distanceCode + 1; } _output.WriteLengthDistance(_length, offset); freeBytes -= _length; _state = Inflater64State.DecodeTop; break; default: Debug.Fail("check why we are here!"); throw new InvalidDataException(SR.UnknownState); } } return(true); }
//Each block of compressed data begins with 3 header bits // containing the following data: // first bit BFINAL // next 2 bits BTYPE // Note that the header bits do not necessarily begin on a byte // boundary, since a block does not necessarily occupy an integral // number of bytes. // BFINAL is set if and only if this is the last block of the data // set. // BTYPE specifies how the data are compressed, as follows: // 00 - no compression // 01 - compressed with fixed Huffman codes // 10 - compressed with dynamic Huffman codes // 11 - reserved (error) // The only difference between the two compressed cases is how the // Huffman codes for the literal/length and distance alphabets are // defined. // // This function returns true for success (end of block or output window is full,) // false if we are short of input // private bool Decode() { bool eob = false; bool result; if (Finished()) { return(true); } if (_hasFormatReader) { if (_state == Inflater64State.ReadingHeader) { if (!_formatReader.ReadHeader(_input)) { return(false); } _state = Inflater64State.ReadingBFinal; } else if (_state == Inflater64State.StartReadingFooter || _state == Inflater64State.ReadingFooter) { if (!_formatReader.ReadFooter(_input)) { return(false); } _state = Inflater64State.VerifyingFooter; return(true); } } if (_state == Inflater64State.ReadingBFinal) { // reading bfinal bit // Need 1 bit if (!_input.EnsureBitsAvailable(1)) { return(false); } _bfinal = _input.GetBits(1); _state = Inflater64State.ReadingBType; } if (_state == Inflater64State.ReadingBType) { // Need 2 bits if (!_input.EnsureBitsAvailable(2)) { _state = Inflater64State.ReadingBType; return(false); } _blockType = (BlockType)_input.GetBits(2); if (_blockType == BlockType.Dynamic) { _state = Inflater64State.ReadingNumLitCodes; } else if (_blockType == BlockType.Static) { _literalLengthTree = HuffmanTree.StaticLiteralLengthTree; _distanceTree = HuffmanTree.StaticDistanceTree; _state = Inflater64State.DecodeTop; } else if (_blockType == BlockType.Uncompressed) { _state = Inflater64State.UncompressedAligning; } else { throw new InvalidDataException(SR.UnknownBlockType); } } if (_blockType == BlockType.Dynamic) { if (_state < Inflater64State.DecodeTop) { // we are reading the header result = DecodeDynamicBlockHeader(); } else { result = DecodeBlock(out eob); // this can returns true when output is full } } else if (_blockType == BlockType.Static) { result = DecodeBlock(out eob); } else if (_blockType == BlockType.Uncompressed) { result = DecodeUncompressedBlock(out eob); } else { throw new InvalidDataException(SR.UnknownBlockType); } // // If we reached the end of the block and the block we were decoding had // bfinal=1 (final block) // if (eob && (_bfinal != 0)) { if (_hasFormatReader) { _state = Inflater64State.StartReadingFooter; } else { _state = Inflater64State.Done; } } return(result); }