Пример #1
0
        bool ReadBlock()
        {
            // Reuse _uncompressedBuffer to read block size uint32
            if (!StreamUtils.ReadAll(_base, _uncompressedBuffer, 4))
            {
                throw new InvalidDataException("Unexpected end of LZ4 data block");
            }
            var blockSize = (int)LittleEndianConverter.ReadUInt32(_uncompressedBuffer, 0);

            if (blockSize == 0)
            {
                _isComplete = true;
                return(false);
            }

            var isCompressed = (blockSize & 0x80000000) == 0;

            blockSize = blockSize & 0x7fffffff;

            if (_compressedBuffer == null || _compressedBuffer.Length < blockSize)
            {
                _compressedBuffer = new byte[Math.Max(blockSize, LZ4Codec.MaximumOutputLength(_blockSize))];
            }

            if (!StreamUtils.ReadAll(_base, _compressedBuffer, blockSize))
            {
                throw new InvalidDataException("Unexpected end of LZ4 data block");
            }

            // Ignore block checksum because kafka does not set it

            if (!isCompressed)
            {
                // TODO: opportunity to optimize by saving the fact that buffer is not compressed and reading from
                // "compressed" buffer in Read method
                Buffer.BlockCopy(_compressedBuffer, 0, _uncompressedBuffer, 0, blockSize);
                _bufferLen = blockSize;
                _bufferPtr = 0;
                return(true);
            }


            var decodedSize = LZ4Codec.Decode(_compressedBuffer, 0, blockSize, _uncompressedBuffer, 0, _uncompressedBuffer.Length);

            _bufferLen = decodedSize;
            _bufferPtr = 0;

            return(true);
        }
Пример #2
0
        //
        // Implementation
        //

        bool ReadHeader()
        {
            // Read magic, FLG/BD and Descriptor Checksum
            if (!StreamUtils.ReadAll(_base, _headerBuffer, 7))
            {
                return(false);
            }

            if (LittleEndianConverter.ReadUInt32(_headerBuffer, 0) != MAGIC)
            {
                throw new InvalidDataException("Invalid lz4 magic");
            }

            // parse FLG
            var flg     = _headerBuffer[4];
            var version = flg >> 6;

            if (version != 1)
            {
                throw new InvalidDataException($"Unsupported version of LZ4 format. Supported 1 but got {version}");
            }

            var hasBlockChecksum = (flg & (1 << 4)) != 0;

            if (hasBlockChecksum)
            {
                throw new NotImplementedException("Block checksum is not implemented");
            }


            // parse BD and allocate uncompressed buffer
            var bd = _headerBuffer[5];
            var maxBlockSizeIndex = (bd >> 4) & 0x7;

            if (maxBlockSizeIndex >= _maxBlockSizeTable.Length)
            {
                throw new InvalidDataException($"Invalid LZ4 max data block size index: {maxBlockSizeIndex}");
            }
            int maxBlockSize = _maxBlockSizeTable[maxBlockSizeIndex];

            if (maxBlockSize == 0)
            {
                throw new InvalidDataException($"Invalid LZ4 max data block size index: {maxBlockSizeIndex}");
            }
            if (_uncompressedBuffer == null || _uncompressedBuffer.Length < maxBlockSize)
            {
                _uncompressedBuffer = new byte[Math.Max(maxBlockSize, _blockSize)];
            }

            _hasher.Init();
            // Yep, this is the bug in kafka's framing checksum KAFKA-3160. Magic should not be checksummed but it is
            _hasher.Update(_headerBuffer, 6);   // Will need to patch it to accept offset in order to avoid unneeded reallocations,
                                                // when want to exlude magic
            var calculatedChecksum = (_hasher.Digest() >> 8) & 0xff;

            if (calculatedChecksum != _headerBuffer[6])
            {
                throw new InvalidDataException("Lz4 Frame Descriptor checksum mismatch");
            }

            return(true);
        }