public async Task <byte[]> SerializeAsync(T data, SerializationContext context) { var json = JsonSerializer.Serialize(data, new JsonSerializerOptions // TODO: Make this configurable { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, }); var subjectName = $"{context.Topic}-{context.Component.ToString().ToLower()}"; var subjectVersions = await _schemaRegistryClient.ListSchemaVersionsAsync(subjectName, CancellationToken.None); if (subjectVersions != null && subjectVersions.Any()) { var version = subjectVersions.Last(); var schemaDetails = await _schemaRegistryClient.GetSchemaAsync(subjectName, version, CancellationToken.None); if (!schemaDetails.SchemaType.Equals("JSON", StringComparison.InvariantCultureIgnoreCase)) { throw new KafkaSerializationException($"Unable to verify schema for subject {subjectName}, version {version}, " + $"as the schema is {schemaDetails.SchemaType} but expected JSON"); } var schema = await JsonSchema.FromJsonAsync(schemaDetails.Schema); var validationErrors = schema.Validate(json); if (validationErrors.Any()) { var validationErrorStrings = validationErrors.Select(err => err.ToString()).ToArray(); throw new KafkaJsonSchemaSerializationException(validationErrorStrings); } } return(Encoding.UTF8.GetBytes(json)); }
public async Task <GenericRecord> Deserialize(string topic, byte[] array) { try { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. using (var stream = new MemoryStream(array)) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != Constants.MagicByte) { // may change in the future. throw new InvalidDataException($"magic byte should be {Constants.MagicByte}, not {magicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <GenericRecord> datumReader; await deserializeMutex.WaitAsync(); try { datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { // TODO: If any of this cache fills up, this is probably an // indication of misuse of the deserializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas) { datumReaderBySchemaId.Clear(); } var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false); var writerSchema = global::Avro.Schema.Parse(writerSchemaJson); datumReader = new GenericReader <GenericRecord>(writerSchema, writerSchema); datumReaderBySchemaId[writerId] = datumReader; } } finally { deserializeMutex.Release(); } return(datumReader.Read(default(GenericRecord), new BinaryDecoder(stream))); } } catch (AggregateException e) { throw e.InnerException; } }
public async Task <T> Deserialize(string topic, byte[] array) { try { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. if (array.Length < 5) { throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes"); } using (var stream = new MemoryStream(array)) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != Constants.MagicByte) { throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <T> datumReader; await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas) { datumReaderBySchemaId.Clear(); } var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false); var writerSchema = global::Avro.Schema.Parse(writerSchemaJson.SchemaString); datumReader = new SpecificReader <T>(writerSchema, ReaderSchema); datumReaderBySchemaId[writerId] = datumReader; } } finally { deserializeMutex.Release(); } return(datumReader.Read(default(T), new BinaryDecoder(stream))); } } catch (AggregateException e) { throw e.InnerException; } }
public async Task <object> DeserializeAsync(ReadOnlyMemory <byte> data, bool isNull, SerializationContext context) { try { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. using (var stream = new MemoryStream(data.ToArray())) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != ConfluentConstants.MagicByte) { // may change in the future. throw new InvalidDataException($"magic byte should be {ConfluentConstants.MagicByte}, not {magicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <object> datumReader; await _deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { _datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { if (_datumReaderBySchemaId.Count > _schemaRegistryClient.MaxCachedSchemas) { _datumReaderBySchemaId.Clear(); } var writerSchemaJson = await _schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false); var writerSchema = global::Avro.Schema.Parse(writerSchemaJson); // Get the ReaderSchema From The Local TopicSubjectSchemaCache var readerSchema = _cache.GetSchema(writerSchema); datumReader = new SpecificReader <object>(writerSchema, readerSchema); _datumReaderBySchemaId[writerId] = datumReader; } } finally { _deserializeMutex.Release(); } return(datumReader.Read(default, new BinaryDecoder(stream)));
public async Task <T> Deserialize(string topic, byte[] array) { try { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. using (var stream = new MemoryStream(array)) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != Constants.MagicByte) { // may change in the future. throw new DeserializationException($"magic byte should be {Constants.MagicByte}, not {magicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <T> datumReader; await deserializeMutex.WaitAsync(); try { datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas) { datumReaderBySchemaId.Clear(); } var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false); var writerSchema = global::Avro.Schema.Parse(writerSchemaJson); datumReader = new SpecificReader <T>(writerSchema, ReaderSchema); datumReaderBySchemaId[writerId] = datumReader; } } finally { deserializeMutex.Release(); } return(datumReader.Read(default(T), new BinaryDecoder(stream))); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Creates a deserializer. /// </summary> /// <param name="registryClient"> /// A client to use for Schema Registry operations. (The client will not be disposed.) /// </param> /// <param name="deserializerBuilder"> /// A deserializer builder (used to build deserialization functions for C# types). If none /// is provided, the default deserializer builder will be used. /// </param> /// <param name="schemaReader"> /// A JSON schema reader (used to convert schemas received from the registry into abstract /// representations). If none is provided, the default schema reader will be used. /// </param> /// <exception cref="ArgumentNullException"> /// Thrown when the registry client is null. /// </exception> public AsyncSchemaRegistryDeserializer( ISchemaRegistryClient registryClient, IBinaryDeserializerBuilder deserializerBuilder = null, IJsonSchemaReader schemaReader = null ) : this( deserializerBuilder, schemaReader ) { if (registryClient == null) { throw new ArgumentNullException(nameof(registryClient)); } _resolve = id => registryClient.GetSchemaAsync(id); }
/// <summary> /// Deserialize a byte array in to an instance of type /// <see cref="ISpecificRecord" />. This is done by finding the /// SchemaId from the provided <paramref name="data"/>. If the /// schema has not been seen before the <see cref="ISchemaRegistryClient"/> /// is used to download the schema. The schema is used to attempt /// to load type information based on the namespace and name. /// After getting the Namespace and Name of the SpecificRecord /// the type is loaded and a concrete instance of /// <see cref="AvroDeserializer{T}"/> is constructed and cached. /// Using the cached deserializer the data is then deserialized /// to an instance of <see cref="ISpecificRecord" /> /// </summary> /// <param name="data"> /// The raw byte data to deserialize. /// </param> /// <param name="isNull"> /// True if this is a null value. /// </param> /// <param name="context"> /// Context relevant to the deserialize operation. /// </param> /// <returns> /// A <see cref="System.Threading.Tasks.Task" /> that completes /// with the deserialized value. /// </returns> /// <exception cref="System.IO.InvalidDataException"> /// Thrown when <paramref name="data"/> does not have a length of /// at least 5 bytes. /// </exception> /// <exception cref="System.IO.InvalidDataException"> /// Thrown when the SchemaId indicated by <paramref name="data"/> /// has a namespace + name which does not match a defined type /// </exception> public async Task <ISpecificRecord> DeserializeAsync(ReadOnlyMemory <byte> data, bool isNull, SerializationContext context) { var dataArray = data.ToArray(); if (dataArray.Length < 5) { throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {dataArray.Length} bytes"); } if (dataArray[0] != 0) { throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {dataArray[0]}, expecting {0}"); } var schemaIdBytes = BitConverter.IsLittleEndian ? new[] { dataArray[4], dataArray[3], dataArray[2], dataArray[1] } : new [] { dataArray[1], dataArray[2], dataArray[3], dataArray[4] }; var schemaId = BitConverter.ToInt32(schemaIdBytes, 0); if (_deserializerCache.ContainsKey(schemaId) == false) { var schema = await _schemaRegistryClient.GetSchemaAsync(schemaId).ConfigureAwait(false); var schemaJson = JObject.Parse(schema.SchemaString); var typeString = $"{schemaJson.SelectToken("namespace").Value<string>()}.{schemaJson.SelectToken("name").Value<string>()}"; var recordType = Type.GetType(typeString); if (recordType == null) { throw new InvalidDataException($"Deserialization failure, type {typeString} cannot be found"); } var deserializerType = typeof(AvroDeserializer <>).MakeGenericType(recordType); dynamic deserializer = Activator.CreateInstance(deserializerType, _schemaRegistryClient, _config); _deserializerCache.Add(schemaId, deserializer); } return((ISpecificRecord)await _deserializerCache[schemaId].DeserializeAsync(data, false, context)); }
public T Deserialize(string topic, byte[] array) { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. using (var stream = new MemoryStream(array)) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != Constants.MagicByte) { // may change in the future. throw new InvalidDataException($"magic byte should be 0, not {magicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <T> datumReader; lock (deserializeLockObj) { datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas) { datumReaderBySchemaId.Clear(); } var writerSchemaJson = schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(false).GetAwaiter().GetResult(); var writerSchema = Avro.Schema.Parse(writerSchemaJson); datumReader = new SpecificReader <T>(writerSchema, ReaderSchema); datumReaderBySchemaId[writerId] = datumReader; } } return(datumReader.Read(default(T), new BinaryDecoder(stream))); } }
/// <inheritdoc/> public async Task <GenericRecord> Deserialize(string topic, byte[] array) { try { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. if (array.Length < 5) { throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes"); } using (var stream = new MemoryStream(array)) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != Constants.MagicByte) { throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <GenericRecord> datumReader; await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { // TODO: If any of this cache fills up, this is probably an // indication of misuse of the deserializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas) { datumReaderBySchemaId.Clear(); } var writerSchemaResult = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false); if (writerSchemaResult.SchemaType != SchemaType.Avro) { throw new InvalidOperationException("Expecting writer schema to have type Avro, not {writerSchemaResult.SchemaType}"); } Avro.Schema writerSchema = null; if (writerSchemaResult.References.Any() && IsUnion(writerSchemaResult.SchemaString)) { StringBuilder schemaBuilder = new StringBuilder(); schemaBuilder.Append("["); foreach (var refSchema in writerSchemaResult.References) { var regSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchema.Subject, refSchema.Version) .ConfigureAwait(continueOnCapturedContext: false); Avro.Schema schema = Avro.Schema.Parse(regSchema.SchemaString); if (schema.Tag != Avro.Schema.Type.Record) { throw new NotSupportedException("Only union schemas containing references to a record are supported for now"); } schemaBuilder.Append($"{regSchema.SchemaString}"); if (writerSchemaResult.References.Last() != refSchema) { schemaBuilder.Append(", "); } } schemaBuilder.Append("]"); writerSchema = global::Avro.Schema.Parse(schemaBuilder.ToString()); } else { writerSchema = global::Avro.Schema.Parse(writerSchemaResult.SchemaString); } datumReader = new GenericReader <GenericRecord>(writerSchema, writerSchema); datumReaderBySchemaId[writerId] = datumReader; } } finally { deserializeMutex.Release(); } return(datumReader.Read(default(GenericRecord), new BinaryDecoder(stream))); } } catch (AggregateException e) { throw e.InnerException; } }
public async Task <T> Deserialize(string topic, byte[] array) { try { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. if (array.Length < 5) { throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes"); } using (var stream = new MemoryStream(array)) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != Constants.MagicByte) { throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <T> datumReader; await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas) { datumReaderBySchemaId.Clear(); } var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false); var writerSchema = global::Avro.Schema.Parse(writerSchemaJson.SchemaString); datumReader = new SpecificReader <T>(writerSchema, ReaderSchema); datumReaderBySchemaId[writerId] = datumReader; } } finally { deserializeMutex.Release(); } if (typeof(ISpecificRecord).IsAssignableFrom(typeof(T))) { // This is a generic deserializer and it knows the type that needs to be serialized into. // Passing default(T) will result in null value and that will force the datumRead to // use the schema namespace and name provided in the schema, which may not match (T). var reuse = Activator.CreateInstance <T>(); return(datumReader.Read(reuse, new BinaryDecoder(stream))); } return(datumReader.Read(default(T), new BinaryDecoder(stream))); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Builds a deserializer for a specific schema. /// </summary> /// <param name="id"> /// The ID of the schema that should be used to deserialize data. /// </param> /// <exception cref="UnsupportedTypeException"> /// Thrown when the type is incompatible with the retrieved schema. /// </exception> public virtual async Task <IDeserializer <T> > Build <T>(int id) { return(Build <T>(id, await RegistryClient.GetSchemaAsync(id))); }