private void TestSerializeOneMessageCommon(Message message) { using (var serialized = new ReusableMemoryStream(null)) { message.Serialize(serialized, CompressionCodec.None, new Tuple <ISerializer, ISerializer>(null, null)); Assert.AreEqual(FullMessageSize, serialized.Length); Assert.AreEqual(0, serialized.GetBuffer()[4]); // magic byte is 0 Assert.AreEqual(0, serialized.GetBuffer()[5]); // attributes is 0 serialized.Position = 6; Assert.AreEqual(TheKey.Length, BigEndianConverter.ReadInt32(serialized)); CompareBuffers(Key, serialized); Assert.AreEqual(TheValue.Length, BigEndianConverter.ReadInt32(serialized)); CompareBuffers(Value, serialized); } using (var serialized = new ReusableMemoryStream(null)) { var msg = new Message { Value = Value }; msg.Serialize(serialized, CompressionCodec.None, new Tuple <ISerializer, ISerializer>(null, null)); Assert.AreEqual(NullKeyMessageSize, serialized.Length); Assert.AreEqual(0, serialized.GetBuffer()[4]); // magic byte is 0 Assert.AreEqual(0, serialized.GetBuffer()[5]); // attributes is 0 serialized.Position = 6; Assert.AreEqual(-1, BigEndianConverter.ReadInt32(serialized)); Assert.AreEqual(TheValue.Length, BigEndianConverter.ReadInt32(serialized)); CompareBuffers(Value, serialized); } }
private static void BlockCompress(ReusableMemoryStream target, byte[] body, int offset, int count) { var position = (int)target.Position; target.SetLength(target.Length + MaxCompressedSize + 4); target.Position = position + 4; var size = LZ4Codec.Encode(body, offset, count, target.GetBuffer(), position + 4, MaxCompressedSize); if (size >= count) { // Do not compress block // => set block header highest bit to 1 to mark no compression LittleEndianWriteUInt32((uint)(count | 1 << 31), target.GetBuffer(), position); // Write uncompressed data target.Write(body, offset, count); } else { LittleEndianWriteUInt32((uint)size, target.GetBuffer(), position); // compressed data is already written, just set the position target.Position += size; } target.SetLength(target.Position); }
private static uint UncompressBlock(ReusableMemoryStream target, byte[] body, int dataIndex, bool hasChecksum, int blockSize) { var blockHeader = LittleEndianReadUInt32(body, dataIndex); if (blockHeader == 0) // last frame { return(0); } var size = blockHeader & 0x7FFFFFFF; if ((blockHeader & 0x80000000) == 0) // compressed data { target.SetLength(target.Length + blockSize); var dsize = LZ4Codec.Decode(body, dataIndex + 4, (int)size, target.GetBuffer(), (int)target.Position, blockSize); if (dsize < blockSize) { target.SetLength(target.Length - blockSize + dsize); } target.Position = target.Length; } else // uncompressed data { target.Write(body, dataIndex + 4, (int)size); } return(size + 4 + (hasChecksum ? 4u : 0)); }
public void Serialize(ReusableMemoryStream stream, CompressionCodec compressionCodec, Tuple <ISerializer, ISerializer> serializers) { var crcPos = stream.Position; stream.Write(Basics.MinusOne32, 0, 4); // crc placeholder var bodyPos = stream.Position; stream.WriteByte(0); // magic byte stream.WriteByte((byte)compressionCodec); // attributes if (SerializedKeyValue != null) { stream.Write(SerializedKeyValue.GetBuffer(), 0, (int)SerializedKeyValue.Length); } else { DoSerializeKeyValue(stream, serializers); } // update crc var crc = Crc32.Compute(stream, bodyPos, stream.Position - bodyPos); var curPos = stream.Position; stream.Position = crcPos; BigEndianConverter.Write(stream, (int)crc); stream.Position = curPos; }
private static void Uncompress(ReusableMemoryStream uncompressed, byte[] body, int offset, int length, CompressionCodec codec) { try { if (codec == CompressionCodec.Snappy) { #if NET_CORE throw new NotImplementedException(); #else uncompressed.SetLength(SnappyCodec.GetUncompressedLength(body, offset, length)); SnappyCodec.Uncompress(body, offset, length, uncompressed.GetBuffer(), 0); #endif } else // compression == CompressionCodec.Gzip { using (var compressed = new MemoryStream(body, offset, length, false)) { using (var gzip = new GZipStream(compressed, CompressionMode.Decompress)) { using (var tmp = uncompressed.Pool.Reserve()) { gzip.ReusableCopyTo(uncompressed, tmp); } } } } uncompressed.Position = 0; } catch (Exception ex) { throw new UncompressException("Invalid compressed data.", codec, ex); } }
public void Serialize(ReusableMemoryStream stream, CompressionCodec compressionCodec, Tuple <ISerializer, ISerializer> serializers, MessageVersion msgVersion) { var crcPos = stream.Position; stream.Write(Basics.MinusOne32, 0, 4); // crc placeholder var bodyPos = stream.Position; // V0 message format if (msgVersion == MessageVersion.V0) { stream.WriteByte(0); // magic byte stream.WriteByte((byte)compressionCodec); // attributes } else // V1 message format { stream.WriteByte(1); // magic byte stream.WriteByte((byte)compressionCodec); // attributes BigEndianConverter.Write(stream, TimeStamp); } if (SerializedKeyValue != null) { if (SerializedKeyValue.Length < MinimumValidSizeForSerializedKeyValue) { HandleInvalidSerializedKeyValue(stream); } else { stream.Write(SerializedKeyValue.GetBuffer(), 0, (int)SerializedKeyValue.Length); } } else { DoSerializeKeyValue(stream, serializers); } // update crc var crc = Crc32.Compute(stream, bodyPos, stream.Position - bodyPos); var curPos = stream.Position; stream.Position = crcPos; BigEndianConverter.Write(stream, (int)crc); stream.Position = curPos; }
private static object EnsureSizedSerializable(object o, ISerializer serializer) { if (Basics.SizeOfSerializedObject(o, serializer as ISizableSerializer) == Basics.UnknownSize) { using (ReusableMemoryStream buffer = new ReusableMemoryStream(null)) { serializer.Serialize(o, buffer); return(buffer.GetBuffer()); } } return(o); }
public void TestSerializeOneMessageCodec() { // Just check attributes, we don't put correct data using (var serialized = new ReusableMemoryStream(null)) { var message = new Message { Value = Value }; message.Serialize(serialized, CompressionCodec.Snappy, new Tuple <ISerializer, ISerializer>(null, null)); Assert.AreEqual(0, serialized.GetBuffer()[4]); // magic byte is 0 Assert.AreEqual(2, serialized.GetBuffer()[5]); // attributes is 2 } using (var serialized = new ReusableMemoryStream(null)) { var message = new Message { Value = Value }; message.Serialize(serialized, CompressionCodec.Gzip, new Tuple <ISerializer, ISerializer>(null, null)); Assert.AreEqual(0, serialized.GetBuffer()[4]); // magic byte is 0 Assert.AreEqual(1, serialized.GetBuffer()[5]); // attributes is 1 } }
/// <summary> /// Compress a given stream using a given compression codec /// </summary> /// <param name="uncompressedStream"> The initial stream we want to compress</param> /// <param name="compressedStream"> The stream that want to put the compressed data in (should be empty before calling the method).</param> /// <param name="compression"> The compression we want to use.</param> /// <returns></returns> internal static void CompressStream(ReusableMemoryStream uncompressedStream, ReusableMemoryStream compressedStream, CompressionCodec compression) { if (compression == CompressionCodec.None) { throw new ArgumentException("Compress a stream only when you want compression."); } switch (compression) { case CompressionCodec.Gzip: using (var gzip = new GZipStream(compressedStream, CompressionMode.Compress, true)) { uncompressedStream.WriteTo(gzip); } break; case CompressionCodec.Lz4: KafkaLz4.Compress(compressedStream, uncompressedStream.GetBuffer(), (int)uncompressedStream.Length); break; case CompressionCodec.Snappy: #if NETSTANDARD1_3 throw new NotImplementedException(); #else compressedStream.SetLength(SnappyCodec.GetMaxCompressedLength((int)uncompressedStream.Length)); { int size = SnappyCodec.Compress(uncompressedStream.GetBuffer(), 0, (int)uncompressedStream.Length, compressedStream.GetBuffer(), 0); compressedStream.SetLength(size); } #endif break; } }
public IEnumerable <Record> DeserializeRecords(ReusableMemoryStream input, int numberOfRecords, long endOfBatch, long firstTimeStamp, Deserializers deserializers) { if (CompressionCodec == CompressionCodec.None) { return(DeserializeRecordsUncompressed(input, numberOfRecords, endOfBatch, firstTimeStamp, deserializers)); } using (var uncompressedStream = input.Pool.Reserve()) { Basics.Uncompress(uncompressedStream, input.GetBuffer(), (int)input.Position, (int)(endOfBatch - input.Position), CompressionCodec); input.Position = endOfBatch; return(new List <Record>(DeserializeRecordsUncompressed(uncompressedStream, numberOfRecords, endOfBatch, firstTimeStamp, deserializers))); // We use a list here to force iteration to take place, so that we can release uncompressedStream } }
public void Test003_SerializeString() { // Non null string using (var serialized = new ReusableMemoryStream(null)) { Basics.SerializeString(serialized, TheValue); Assert.AreEqual(2 + Value.Length, serialized.Length); serialized.Position = 0; Assert.AreEqual(TheValue.Length, BigEndianConverter.ReadInt16(serialized)); CompareArrays(Value, serialized.GetBuffer(), 2); } // Null string using (var serialized = new ReusableMemoryStream(null)) { Basics.SerializeString(serialized, null); Assert.AreEqual(2, serialized.Length); serialized.Position = 0; Assert.AreEqual(-1, BigEndianConverter.ReadInt16(serialized)); } }
public ReusableMemoryStream Serialize(ReusableMemoryStream target, long baseTimestamp, long offsetDelta) { long timestampDelta = Timestamp - baseTimestamp; VarIntConverter.Write(target, SizeOfBodyInBytes(offsetDelta, timestampDelta)); // Record attributes are always null. target.WriteByte(0x00); VarIntConverter.Write(target, timestampDelta); VarIntConverter.Write(target, offsetDelta); if (SerializedKeyValue == null) { Basics.WriteObject(target, Key, KeySerializer); Basics.WriteObject(target, Value, ValueSerializer); } else { target.Write(SerializedKeyValue.GetBuffer(), 0, (int)SerializedKeyValue.Length); } if (Headers == null) { target.Write(Basics.ZeroVarInt, 0, Basics.ZeroVarInt.Length); } else { VarIntConverter.Write(target, Headers.Count); foreach (KafkaRecordHeader header in Headers) { SerializeHeader(target, header); } } return(target); }
// Deserialize a message set to a sequence of messages. // This handles the "partial message allowed at end of message set" from Kafka brokers // and compressed message sets (the method recursively calls itself in this case and // flatten the result). The returned enumeration must be enumerated for deserialization // effectiveley occuring. private static IEnumerable <ResponseMessage> LazyDeserializeMessageSet(ReusableMemoryStream stream, int messageSetSize, Deserializers deserializers) { var remainingMessageSetBytes = messageSetSize; while (remainingMessageSetBytes > 0) { const int offsetSize = 8; const int msgsizeSize = 4; if (remainingMessageSetBytes < offsetSize + msgsizeSize) { // This is a partial message => skip to the end of the message set. // TODO: unit test this stream.Position += remainingMessageSetBytes; yield break; } var offset = BigEndianConverter.ReadInt64(stream); var messageSize = BigEndianConverter.ReadInt32(stream); remainingMessageSetBytes -= offsetSize + msgsizeSize; if (remainingMessageSetBytes < messageSize) { // This is a partial message => skip to the end of the message set. stream.Position += remainingMessageSetBytes; yield break; } // Message body var crc = BigEndianConverter.ReadInt32(stream); var crcStartPos = stream.Position; // crc is computed from this position var magic = stream.ReadByte(); if (magic != 0) { throw new UnsupportedMagicByteVersion((byte)magic); } var attributes = stream.ReadByte(); // Check for compression var codec = (CompressionCodec)(attributes & 3); // Lowest 2 bits if (codec == CompressionCodec.None) { var msg = new ResponseMessage { Offset = offset, Message = new Message { Key = Basics.DeserializeByteArray(stream, deserializers.Item1), Value = Basics.DeserializeByteArray(stream, deserializers.Item2) } }; CheckCrc(crc, stream, crcStartPos); yield return(msg); } else { // Key is null, read/check/skip if (BigEndianConverter.ReadInt32(stream) != -1) { throw new InvalidDataException("Compressed messages key should be null"); } // Uncompress var compressedLength = BigEndianConverter.ReadInt32(stream); var dataPos = stream.Position; stream.Position += compressedLength; CheckCrc(crc, stream, crcStartPos); using (var uncompressedStream = stream.Pool.Reserve()) { Uncompress(uncompressedStream, stream.GetBuffer(), (int)dataPos, compressedLength, codec); // Deserialize recursively foreach (var m in LazyDeserializeMessageSet(uncompressedStream, (int)uncompressedStream.Length, deserializers)) { // Flatten yield return(m); } } } remainingMessageSetBytes -= messageSize; } }
// Deserialize a message set to a sequence of messages. // This handles the "partial message allowed at end of message set" from Kafka brokers // and compressed message sets (the method recursively calls itself in this case and // flatten the result). The returned enumeration must be enumerated for deserialization // effectiveley occuring. // // A message set can contain a mix of v0 and v1 messages. // In the case of compressed messages, offsets are returned differently by brokers. // Messages inside compressed message v0 will have absolute offsets assigned. // Messages inside compressed message v1 will have relative offset assigned, starting // from 0. The wrapping compressed message itself is assigned the absolute offset of the last // message in the set. That means in this case we can only assign offsets after having decompressing // all messages. Lazy deserialization won't be so lazy anymore... private static IEnumerable <ResponseMessage> LazyDeserializeMessageSet(ReusableMemoryStream stream, int messageSetSize, Deserializers deserializers) { var remainingMessageSetBytes = messageSetSize; while (remainingMessageSetBytes > 0) { const int offsetSize = 8; const int msgsizeSize = 4; if (remainingMessageSetBytes < offsetSize + msgsizeSize) { // This is a partial message => skip to the end of the message set. // TODO: unit test this stream.Position += remainingMessageSetBytes; yield break; } var offset = BigEndianConverter.ReadInt64(stream); var messageSize = BigEndianConverter.ReadInt32(stream); remainingMessageSetBytes -= offsetSize + msgsizeSize; if (remainingMessageSetBytes < messageSize) { // This is a partial message => skip to the end of the message set. stream.Position += remainingMessageSetBytes; yield break; } // Message body var crc = BigEndianConverter.ReadInt32(stream); var crcStartPos = stream.Position; // crc is computed from this position var magic = stream.ReadByte(); if ((uint)magic > 1) { throw new UnsupportedMagicByteVersion((byte)magic); } var attributes = stream.ReadByte(); long timestamp = 0; if (magic == 1) { timestamp = BigEndianConverter.ReadInt64(stream); } // Check for compression var codec = (CompressionCodec)(attributes & 3); // Lowest 2 bits if (codec == CompressionCodec.None) { var msg = new ResponseMessage { Offset = offset, Message = new Message { Key = Basics.DeserializeByteArray(stream, deserializers.Item1), Value = Basics.DeserializeByteArray(stream, deserializers.Item2), TimeStamp = timestamp } }; CheckCrc(crc, stream, crcStartPos); yield return(msg); } else { // Key is null, read/check/skip if (BigEndianConverter.ReadInt32(stream) != -1) { throw new InvalidDataException("Compressed messages key should be null"); } // Uncompress var compressedLength = BigEndianConverter.ReadInt32(stream); var dataPos = stream.Position; stream.Position += compressedLength; CheckCrc(crc, stream, crcStartPos); using (var uncompressedStream = stream.Pool.Reserve()) { Uncompress(uncompressedStream, stream.GetBuffer(), (int)dataPos, compressedLength, codec); // Deserialize recursively if (magic == 0) // v0 message { foreach (var m in LazyDeserializeMessageSet(uncompressedStream, (int)uncompressedStream.Length, deserializers)) { // Flatten yield return(m); } } else // v1 message, we have to assign the absolute offsets { var innerMsgs = ResponseMessageListPool.Reserve(); // We need to deserialize all messages first, because the wrapper offset is the // offset of the last messe in the set, so wee need to know how many messages there are // before assigning offsets. innerMsgs.AddRange(LazyDeserializeMessageSet(uncompressedStream, (int)uncompressedStream.Length, deserializers)); var baseOffset = offset - innerMsgs.Count + 1; foreach (var msg in innerMsgs) { yield return (new ResponseMessage { Offset = msg.Offset + baseOffset, Message = msg.Message }); } ResponseMessageListPool.Release(innerMsgs); } } } remainingMessageSetBytes -= messageSize; } }
static void CompareBuffers(byte[] expected, ReusableMemoryStream compared) { CompareArrays(expected, compared.GetBuffer(), (int)compared.Position); compared.Position += expected.Length; }