bool ReadBlock() { // Reuse _uncompressedBuffer to read block size uint32 if (!StreamUtils.ReadAll(_base, _uncompressedBuffer, 4)) { throw new InvalidDataException("Unexpected end of LZ4 data block"); } var blockSize = (int)LittleEndianConverter.ReadUInt32(_uncompressedBuffer, 0); if (blockSize == 0) { _isComplete = true; return(false); } var isCompressed = (blockSize & 0x80000000) == 0; blockSize = blockSize & 0x7fffffff; if (_compressedBuffer == null || _compressedBuffer.Length < blockSize) { _compressedBuffer = new byte[Math.Max(blockSize, LZ4Codec.MaximumOutputLength(_blockSize))]; } if (!StreamUtils.ReadAll(_base, _compressedBuffer, blockSize)) { throw new InvalidDataException("Unexpected end of LZ4 data block"); } // Ignore block checksum because kafka does not set it if (!isCompressed) { // TODO: opportunity to optimize by saving the fact that buffer is not compressed and reading from // "compressed" buffer in Read method Buffer.BlockCopy(_compressedBuffer, 0, _uncompressedBuffer, 0, blockSize); _bufferLen = blockSize; _bufferPtr = 0; return(true); } var decodedSize = LZ4Codec.Decode(_compressedBuffer, 0, blockSize, _uncompressedBuffer, 0, _uncompressedBuffer.Length); _bufferLen = decodedSize; _bufferPtr = 0; return(true); }
// // Implementation // bool ReadHeader() { // Read magic, FLG/BD and Descriptor Checksum if (!StreamUtils.ReadAll(_base, _headerBuffer, 7)) { return(false); } if (LittleEndianConverter.ReadUInt32(_headerBuffer, 0) != MAGIC) { throw new InvalidDataException("Invalid lz4 magic"); } // parse FLG var flg = _headerBuffer[4]; var version = flg >> 6; if (version != 1) { throw new InvalidDataException($"Unsupported version of LZ4 format. Supported 1 but got {version}"); } var hasBlockChecksum = (flg & (1 << 4)) != 0; if (hasBlockChecksum) { throw new NotImplementedException("Block checksum is not implemented"); } // parse BD and allocate uncompressed buffer var bd = _headerBuffer[5]; var maxBlockSizeIndex = (bd >> 4) & 0x7; if (maxBlockSizeIndex >= _maxBlockSizeTable.Length) { throw new InvalidDataException($"Invalid LZ4 max data block size index: {maxBlockSizeIndex}"); } int maxBlockSize = _maxBlockSizeTable[maxBlockSizeIndex]; if (maxBlockSize == 0) { throw new InvalidDataException($"Invalid LZ4 max data block size index: {maxBlockSizeIndex}"); } if (_uncompressedBuffer == null || _uncompressedBuffer.Length < maxBlockSize) { _uncompressedBuffer = new byte[Math.Max(maxBlockSize, _blockSize)]; } _hasher.Init(); // Yep, this is the bug in kafka's framing checksum KAFKA-3160. Magic should not be checksummed but it is _hasher.Update(_headerBuffer, 6); // Will need to patch it to accept offset in order to avoid unneeded reallocations, // when want to exlude magic var calculatedChecksum = (_hasher.Digest() >> 8) & 0xff; if (calculatedChecksum != _headerBuffer[6]) { throw new InvalidDataException("Lz4 Frame Descriptor checksum mismatch"); } return(true); }