static MessageData Compress(CompressionType compressionType, IEnumerable <MessageData> messages) { if (compressed == null) { compressed = new MemoryStream(); } else { compressed.Position = 0; compressed.SetLength(0); } switch (compressionType) { case CompressionType.Gzip: { var gzip = new GZipStream(compressed, CompressionLevel.Optimal, true); Write(gzip, messages); gzip.Close(); break; } case CompressionType.Snappy: { if (_snappyCompressedBuffer == null) { KafkaSnappyStream.AllocateBuffers(out _snappyUncompressedBuffer, out _snappyCompressedBuffer); } var snappy = new KafkaSnappyStream(compressed, CompressionStreamMode.Compress, _snappyUncompressedBuffer, _snappyCompressedBuffer); Write(snappy, messages); snappy.Close(); break; } case CompressionType.Lz4: { var lz4 = new Lz4KafkaStream(compressed, CompressionStreamMode.Compress); Write(lz4, messages); lz4.Close(); break; } default: throw new NotImplementedException($"Compression '{compressionType}' is not implemented"); } var buff = new ArraySegment <byte>(compressed.GetBuffer(), 0, (int)compressed.Length); return(new MessageData { Key = null, Value = buff }); }
//N.B., MessageSets are not preceded by an int32 like other array elements in the protocol. // //MessageSet => [Offset MessageSize Message] // Offset => int64 // MessageSize => int32 // //Message => Crc MagicByte Attributes Key Value // Crc => int32 // MagicByte => int8 // Attributes => int8 // Key => bytes // Value => bytes private static IEnumerable <Message> ReadMessageSet(Stream stream, int messageSetSize) { // "As an optimization the server is allowed to return a partial message at the end of the message set. // Clients should handle this case" var remainingMessageSetBytes = messageSetSize; while (remainingMessageSetBytes > 0) { // we need at least be able to read offset and messageSize if (remainingMessageSetBytes < 8 + 4) { // not enough bytes left. This is a partial message. Skip to the end of the message set. stream.Position += remainingMessageSetBytes; yield break; } var offset = BigEndianConverter.ReadInt64(stream); var messageSize = BigEndianConverter.ReadInt32(stream); // we took 12 bytes there, check again that we have a full message. remainingMessageSetBytes -= 8 + 4; if (remainingMessageSetBytes < messageSize) { // not enough bytes left. This is a partial message. Skip to the end of the message set. stream.Position += remainingMessageSetBytes; yield break; } // Message var crc = (uint)BigEndianConverter.ReadInt32(stream); byte magic = (byte)stream.ReadByte(); if (magic != 0) { throw new BrokerException("Invalid kafks message magic"); // TODO: use special exception for data corruption } var attributes = (byte)stream.ReadByte(); var compression = ParseCompression(attributes); var key = ReadByteArray(stream); var value = ReadByteArray(stream); if (compression == CompressionType.None) { var msg = new Message(); msg.Key = key; msg.Value = value; msg.Offset = offset; var computedCrc = Crc32.Update(magic); computedCrc = Crc32.Update(attributes, computedCrc); if (key == null) { computedCrc = Crc32.Update(_minusOne32, computedCrc); } else { computedCrc = Crc32.Update(key.Length, computedCrc); computedCrc = Crc32.Update(key, computedCrc); } if (value == null) { computedCrc = Crc32.Update(_minusOne32); } else { computedCrc = Crc32.Update(value.Length, computedCrc); computedCrc = Crc32.Update(value, computedCrc); } computedCrc = Crc32.GetHash(computedCrc); if (computedCrc != crc) { throw new BrokerException(string.Format("Corrupt message: Crc does not match. Caclulated {0} but got {1}", computedCrc, crc)); } yield return(msg); } else if (compression == CompressionType.Gzip) { var decompressedStream = new MemoryStream(); new GZipStream(new MemoryStream(value), CompressionMode.Decompress).CopyTo(decompressedStream); decompressedStream.Seek(0, SeekOrigin.Begin); // Recursion var innerMessages = ReadMessageSet(decompressedStream, (int)decompressedStream.Length); foreach (var innerMessage in innerMessages) { yield return(innerMessage); } } else if (compression == CompressionType.Lz4) { using (var lz4Stream = new Lz4KafkaStream(new MemoryStream(value), CompressionStreamMode.Decompress)) { var decompressed = new MemoryStream(); lz4Stream.CopyTo(decompressed); decompressed.Seek(0, SeekOrigin.Begin); var decompressedMessages = ReadMessageSet(decompressed, (int)decompressed.Length); foreach (var msg in decompressedMessages) { yield return(msg); } } } else if (compression == CompressionType.Snappy) { if (_snappyCompressedBuffer == null) { KafkaSnappyStream.AllocateBuffers(out _snappyUncompressedBuffer, out _snappyCompressedBuffer); } using (var snappyStream = new KafkaSnappyStream(new MemoryStream(value), CompressionStreamMode.Decompress, _snappyUncompressedBuffer, _snappyCompressedBuffer)) { var decompressed = new MemoryStream(); snappyStream.CopyTo(decompressed); decompressed.Seek(0, SeekOrigin.Begin); var decompressedMessages = ReadMessageSet(decompressed, (int)decompressed.Length); foreach (var msg in decompressedMessages) { yield return(msg); } } } else { throw new BrokerException(string.Format("Unknown compression type: {0}", attributes & 3)); } // subtract messageSize of that message from remaining bytes remainingMessageSetBytes -= messageSize; } }