// Deserialize a message set to a sequence of messages. // This handles the "partial message allowed at end of message set" from Kafka brokers // and compressed message sets (the method recursively calls itself in this case and // flatten the result). The returned enumeration must be enumerated for deserialization // effectiveley occuring. // // A message set can contain a mix of v0 and v1 messages. // In the case of compressed messages, offsets are returned differently by brokers. // Messages inside compressed message v0 will have absolute offsets assigned. // Messages inside compressed message v1 will have relative offset assigned, starting // from 0. The wrapping compressed message itself is assigned the absolute offset of the last // message in the set. That means in this case we can only assign offsets after having decompressing // all messages. Lazy deserialization won't be so lazy anymore... private static IEnumerable <ResponseMessage> LazyDeserializeMessageSet(ReusableMemoryStream stream, int messageSetSize, Deserializers deserializers) { var remainingMessageSetBytes = messageSetSize; while (remainingMessageSetBytes > 0) { const int offsetSize = 8; const int msgsizeSize = 4; if (remainingMessageSetBytes < offsetSize + msgsizeSize) { // This is a partial message => skip to the end of the message set. // TODO: unit test this stream.Position += remainingMessageSetBytes; yield break; } var offset = BigEndianConverter.ReadInt64(stream); var messageSize = BigEndianConverter.ReadInt32(stream); remainingMessageSetBytes -= offsetSize + msgsizeSize; if (remainingMessageSetBytes < messageSize) { // This is a partial message => skip to the end of the message set. stream.Position += remainingMessageSetBytes; yield break; } // Message body var crc = BigEndianConverter.ReadInt32(stream); var crcStartPos = stream.Position; // crc is computed from this position var magic = stream.ReadByte(); if ((uint)magic > 1) { throw new UnsupportedMagicByteVersion((byte)magic, "0 or 1"); } var attributes = stream.ReadByte(); long timestamp = 0; if (magic == 1) { timestamp = BigEndianConverter.ReadInt64(stream); } // Check for compression var codec = (CompressionCodec)(attributes & 3); // Lowest 2 bits if (codec == CompressionCodec.None) { var msg = new ResponseMessage { Offset = offset, Message = new Message { Key = Basics.DeserializeByteArray(stream, deserializers.Item1), Value = Basics.DeserializeByteArray(stream, deserializers.Item2), TimeStamp = timestamp } }; Crc32.CheckCrc(crc, stream, crcStartPos); yield return(msg); } else { // Key is null, read/check/skip if (BigEndianConverter.ReadInt32(stream) != -1) { throw new InvalidDataException("Compressed messages key should be null"); } // Uncompress var compressedLength = BigEndianConverter.ReadInt32(stream); var dataPos = stream.Position; stream.Position += compressedLength; Crc32.CheckCrc(crc, stream, crcStartPos); using (var uncompressedStream = stream.Pool.Reserve()) { Basics.Uncompress(uncompressedStream, stream.GetBuffer(), (int)dataPos, compressedLength, codec); // Deserialize recursively if (magic == 0) // v0 message { foreach (var m in LazyDeserializeMessageSet(uncompressedStream, (int)uncompressedStream.Length, deserializers)) { // Flatten yield return(m); } } else // v1 message, we have to assign the absolute offsets { var innerMsgs = ResponseMessageListPool.Reserve(); // We need to deserialize all messages first, because the wrapper offset is the // offset of the last messe in the set, so wee need to know how many messages there are // before assigning offsets. innerMsgs.AddRange(LazyDeserializeMessageSet(uncompressedStream, (int)uncompressedStream.Length, deserializers)); var baseOffset = offset - innerMsgs.Count + 1; foreach (var msg in innerMsgs) { yield return (new ResponseMessage { Offset = msg.Offset + baseOffset, Message = msg.Message }); } ResponseMessageListPool.Release(innerMsgs); } } } remainingMessageSetBytes -= messageSize; } }