Exemple #1
0
                                                      + 4; // batchLength

        /// <remarks>
        /// From the official protocol documentation available at
        /// https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-FetchAPI
        /// "As an optimization the server is allowed to return a partial message at the end of the message set. Clients should handle this case."
        /// If the end of the RecordBatch exceeds the length of the whole response (= endOfAllBatches), we should discard this RecordBatch.
        /// </remarks>
        public static RecordBatch Deserialize(ReusableMemoryStream input, Deserializers deserializers, long endOfAllBatches)
        {
            var recordBatch = new RecordBatch();

            if (input.Position + BytesNecessaryToGetLength > endOfAllBatches)
            {
                throw new ProtocolException(
                          $"Trying to read a batch record at {input.Position} and the end of all batches is {endOfAllBatches}."
                          + $" There is not enough bytes remaining to even read the first fields...");
            }
            recordBatch.BaseOffset = BigEndianConverter.ReadInt64(input);
            var batchLength = BigEndianConverter.ReadInt32(input);
            var endOfBatch  = input.Position + batchLength;

            if (endOfAllBatches < endOfBatch)
            {
                // Partial message, CRCs won't match, return here so the CRC check doesn't throw
                return(null);
            }
            recordBatch.PartitionLeaderEpoch = BigEndianConverter.ReadInt32(input);
            var magic = input.ReadByte();

            // Current magic value is 2
            if ((uint)magic != 2)
            {
                throw new UnsupportedMagicByteVersion((byte)magic, "2");
            }

            var crc = (uint)BigEndianConverter.ReadInt32(input);
            var afterCrcPosition = input.Position; // The crc is calculated starting from this position

            Crc32.CheckCrcCastagnoli((int)crc, input, afterCrcPosition, endOfBatch - afterCrcPosition);

            var attributes = BigEndianConverter.ReadInt16(input);

            recordBatch.CompressionCodec = (CompressionCodec)(attributes & CompressionCodecMask);
            recordBatch.IsTransactional  = (attributes & TransactionalFlagMask) != 0;
            recordBatch.IsControl        = (attributes & ControlFlagMask) != 0;
            recordBatch.TimestampType    = (attributes & TimestampTypeMask) > 0
                ? TimestampType.LogAppendTime
                : TimestampType.CreateTime;

            var lastOffsetDelta = BigEndianConverter.ReadInt32(input);

            var firstTimestamp = BigEndianConverter.ReadInt64(input);
            var maxTimestamp   = BigEndianConverter.ReadInt64(input);

            recordBatch.ProducerId    = BigEndianConverter.ReadInt64(input);
            recordBatch.ProducerEpoch = BigEndianConverter.ReadInt16(input);
            recordBatch.BaseSequence  = BigEndianConverter.ReadInt32(input);

            var numberOfRecords = BigEndianConverter.ReadInt32(input);

            recordBatch.Records = DeserializeRecords(recordBatch, input, numberOfRecords, endOfBatch, firstTimestamp, deserializers);

            return(recordBatch);
        }
        internal static List <ResponseMessage> DeserializeRecordBatch(ReusableMemoryStream stream,
                                                                      Deserializers deserializers)
        {
            var list = ResponseMessageListPool.Reserve();

            // First of all, we need to check if it is really a recordBatch (i.e. magic byte = 2)
            // For that, we need to fast-forward to the magic byte. Depending on its value, we go back
            // and deserialize following the corresponding format. (Kafka is amazing).
            var startOfBatchOffset = stream.Position;

            stream.Position += HeaderSize;
            var magic = stream.ReadByte();

            stream.Position = startOfBatchOffset;
            if (magic < 2)
            {
                // We were mistaken, this is not a recordBatch but a message set!
                return(DeserializeMessageSet(stream, deserializers));
            }
            // Now we know what we received is a proper recordBatch
            var size            = BigEndianConverter.ReadInt32(stream);
            var endOfAllBatches = stream.Position + size;

            if (stream.Length < endOfAllBatches)
            {
                throw new ProtocolException($"Fetch response advertise record batches of total size {size},"
                                            + $" but the stream only contains {stream.Length - stream.Position} byte remaining");
            }
            while (stream.Position < endOfAllBatches)
            {
                var recordBatch = RecordBatch.Deserialize(stream, deserializers, endOfAllBatches);
                if (recordBatch == null)
                {
                    // We received a partial record batch, discard it
                    stream.Position = endOfAllBatches;
                    break;
                }
                list.AddRange(recordBatch.Records.Select(record => new ResponseMessage
                {
                    Message = new Message {
                        Key = record.Key, Value = record.Value, TimeStamp = record.Timestamp
                    },
                    Offset = record.Offset
                }));
            }
            return(list);
        }
Exemple #3
0
        public IEnumerable <Record> DeserializeRecordsUncompressed(ReusableMemoryStream input, int numberOfRecords, long endOfBatch,
                                                                   long firstTimeStamp, Deserializers deserializers)
        {
            for (var i = 0; i < numberOfRecords; i++)
            {
                var length = VarIntConverter.ReadInt32(input);
                if (input.Length - input.Position < length)
                {
                    throw new ProtocolException(
                              $"Record said it was of length {length}, but actually only {input.Length - input.Position} bytes remain");
                }

                var attributes     = input.ReadByte(); // ignored for now
                var timeStampDelta = VarIntConverter.ReadInt64(input);
                var offsetDelta    = VarIntConverter.ReadInt32(input);

                var keyLength   = VarIntConverter.ReadInt32(input);
                var key         = keyLength == -1 ? null : deserializers.Item1.Deserialize(input, keyLength);
                var valueLength = VarIntConverter.ReadInt32(input);
                var value       = valueLength == -1 ? null : deserializers.Item1.Deserialize(input, valueLength);

                var headersCount = VarIntConverter.ReadInt32(input);
                var headers      = new List <KafkaRecordHeader>(headersCount);
                for (var j = 0; j < headersCount; j++)
                {
                    headers.Add(Record.DeserializeHeader(input));
                }

                yield return(new Record
                {
                    Headers = headers,
                    Key = key,
                    Timestamp = firstTimeStamp + timeStampDelta,
                    Value = value,
                    Offset = BaseOffset + offsetDelta
                });
            }
        }
Exemple #4
0
        // Deserialize a message set to a sequence of messages.
        // This handles the "partial message allowed at end of message set" from Kafka brokers
        // and compressed message sets (the method recursively calls itself in this case and
        // flatten the result). The returned enumeration must be enumerated for deserialization
        // effectiveley occuring.
        private static IEnumerable <ResponseMessage> LazyDeserializeMessageSet(ReusableMemoryStream stream, int messageSetSize, Deserializers deserializers)
        {
            var remainingMessageSetBytes = messageSetSize;

            while (remainingMessageSetBytes > 0)
            {
                const int offsetSize  = 8;
                const int msgsizeSize = 4;
                if (remainingMessageSetBytes < offsetSize + msgsizeSize)
                {
                    // This is a partial message => skip to the end of the message set.
                    // TODO: unit test this
                    stream.Position += remainingMessageSetBytes;
                    yield break;
                }

                var offset      = BigEndianConverter.ReadInt64(stream);
                var messageSize = BigEndianConverter.ReadInt32(stream);

                remainingMessageSetBytes -= offsetSize + msgsizeSize;
                if (remainingMessageSetBytes < messageSize)
                {
                    // This is a partial message => skip to the end of the message set.
                    stream.Position += remainingMessageSetBytes;
                    yield break;
                }

                // Message body
                var crc         = BigEndianConverter.ReadInt32(stream);
                var crcStartPos = stream.Position; // crc is computed from this position
                var magic       = stream.ReadByte();
                if (magic != 0)
                {
                    throw new UnsupportedMagicByteVersion((byte)magic);
                }
                var attributes = stream.ReadByte();

                // Check for compression
                var codec = (CompressionCodec)(attributes & 3); // Lowest 2 bits
                if (codec == CompressionCodec.None)
                {
                    var msg = new ResponseMessage
                    {
                        Offset  = offset,
                        Message = new Message
                        {
                            Key   = Basics.DeserializeByteArray(stream, deserializers.Item1),
                            Value = Basics.DeserializeByteArray(stream, deserializers.Item2)
                        }
                    };
                    CheckCrc(crc, stream, crcStartPos);
                    yield return(msg);
                }
                else
                {
                    // Key is null, read/check/skip
                    if (BigEndianConverter.ReadInt32(stream) != -1)
                    {
                        throw new InvalidDataException("Compressed messages key should be null");
                    }

                    // Uncompress
                    var compressedLength = BigEndianConverter.ReadInt32(stream);
                    var dataPos          = stream.Position;
                    stream.Position += compressedLength;
                    CheckCrc(crc, stream, crcStartPos);
                    using (var uncompressedStream = stream.Pool.Reserve())
                    {
                        Uncompress(uncompressedStream, stream.GetBuffer(), (int)dataPos, compressedLength, codec);
                        // Deserialize recursively
                        foreach (var m in LazyDeserializeMessageSet(uncompressedStream, (int)uncompressedStream.Length, deserializers))
                        {
                            // Flatten
                            yield return(m);
                        }
                    }
                }

                remainingMessageSetBytes -= messageSize;
            }
        }
Exemple #5
0
        // Deserialize a message set to a sequence of messages.
        // This handles the "partial message allowed at end of message set" from Kafka brokers
        // and compressed message sets (the method recursively calls itself in this case and
        // flatten the result). The returned enumeration must be enumerated for deserialization
        // effectiveley occuring.
        //
        // A message set can contain a mix of v0 and v1 messages.
        // In the case of compressed messages, offsets are returned differently by brokers.
        // Messages inside compressed message v0 will have absolute offsets assigned.
        // Messages inside compressed message v1 will have relative offset assigned, starting
        // from 0. The wrapping compressed message itself is assigned the absolute offset of the last
        // message in the set. That means in this case we can only assign offsets after having decompressing
        // all messages. Lazy deserialization won't be so lazy anymore...
        private static IEnumerable <ResponseMessage> LazyDeserializeMessageSet(ReusableMemoryStream stream, int messageSetSize, Deserializers deserializers)
        {
            var remainingMessageSetBytes = messageSetSize;

            while (remainingMessageSetBytes > 0)
            {
                const int offsetSize  = 8;
                const int msgsizeSize = 4;
                if (remainingMessageSetBytes < offsetSize + msgsizeSize)
                {
                    // This is a partial message => skip to the end of the message set.
                    // TODO: unit test this
                    stream.Position += remainingMessageSetBytes;
                    yield break;
                }

                var offset      = BigEndianConverter.ReadInt64(stream);
                var messageSize = BigEndianConverter.ReadInt32(stream);

                remainingMessageSetBytes -= offsetSize + msgsizeSize;
                if (remainingMessageSetBytes < messageSize)
                {
                    // This is a partial message => skip to the end of the message set.
                    stream.Position += remainingMessageSetBytes;
                    yield break;
                }

                // Message body
                var crc         = BigEndianConverter.ReadInt32(stream);
                var crcStartPos = stream.Position; // crc is computed from this position
                var magic       = stream.ReadByte();
                if ((uint)magic > 1)
                {
                    throw new UnsupportedMagicByteVersion((byte)magic);
                }
                var  attributes = stream.ReadByte();
                long timestamp  = 0;
                if (magic == 1)
                {
                    timestamp = BigEndianConverter.ReadInt64(stream);
                }

                // Check for compression
                var codec = (CompressionCodec)(attributes & 3); // Lowest 2 bits
                if (codec == CompressionCodec.None)
                {
                    var msg = new ResponseMessage
                    {
                        Offset  = offset,
                        Message = new Message
                        {
                            Key       = Basics.DeserializeByteArray(stream, deserializers.Item1),
                            Value     = Basics.DeserializeByteArray(stream, deserializers.Item2),
                            TimeStamp = timestamp
                        }
                    };
                    CheckCrc(crc, stream, crcStartPos);
                    yield return(msg);
                }
                else
                {
                    // Key is null, read/check/skip
                    if (BigEndianConverter.ReadInt32(stream) != -1)
                    {
                        throw new InvalidDataException("Compressed messages key should be null");
                    }

                    // Uncompress
                    var compressedLength = BigEndianConverter.ReadInt32(stream);
                    var dataPos          = stream.Position;
                    stream.Position += compressedLength;
                    CheckCrc(crc, stream, crcStartPos);
                    using (var uncompressedStream = stream.Pool.Reserve())
                    {
                        Uncompress(uncompressedStream, stream.GetBuffer(), (int)dataPos, compressedLength, codec);
                        // Deserialize recursively
                        if (magic == 0) // v0 message
                        {
                            foreach (var m in
                                     LazyDeserializeMessageSet(uncompressedStream, (int)uncompressedStream.Length,
                                                               deserializers))
                            {
                                // Flatten
                                yield return(m);
                            }
                        }
                        else // v1 message, we have to assign the absolute offsets
                        {
                            var innerMsgs = ResponseMessageListPool.Reserve();
                            // We need to deserialize all messages first, because the wrapper offset is the
                            // offset of the last messe in the set, so wee need to know how many messages there are
                            // before assigning offsets.
                            innerMsgs.AddRange(LazyDeserializeMessageSet(uncompressedStream,
                                                                         (int)uncompressedStream.Length, deserializers));
                            var baseOffset = offset - innerMsgs.Count + 1;
                            foreach (var msg in innerMsgs)
                            {
                                yield return
                                    (new ResponseMessage
                                {
                                    Offset = msg.Offset + baseOffset,
                                    Message = msg.Message
                                });
                            }
                            ResponseMessageListPool.Release(innerMsgs);
                        }
                    }
                }

                remainingMessageSetBytes -= messageSize;
            }
        }