Example #1
0
        public async Task <byte[]> SerializeAsync(T data, SerializationContext context)
        {
            var json = JsonSerializer.Serialize(data, new JsonSerializerOptions // TODO: Make this configurable
            {
                PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
            });

            var subjectName     = $"{context.Topic}-{context.Component.ToString().ToLower()}";
            var subjectVersions = await _schemaRegistryClient.ListSchemaVersionsAsync(subjectName, CancellationToken.None);

            if (subjectVersions != null && subjectVersions.Any())
            {
                var version       = subjectVersions.Last();
                var schemaDetails = await _schemaRegistryClient.GetSchemaAsync(subjectName, version, CancellationToken.None);

                if (!schemaDetails.SchemaType.Equals("JSON", StringComparison.InvariantCultureIgnoreCase))
                {
                    throw new KafkaSerializationException($"Unable to verify schema for subject {subjectName}, version {version}, " +
                                                          $"as the schema is {schemaDetails.SchemaType} but expected JSON");
                }

                var schema = await JsonSchema.FromJsonAsync(schemaDetails.Schema);

                var validationErrors = schema.Validate(json);
                if (validationErrors.Any())
                {
                    var validationErrorStrings = validationErrors.Select(err =>
                                                                         err.ToString()).ToArray();
                    throw new KafkaJsonSchemaSerializationException(validationErrorStrings);
                }
            }

            return(Encoding.UTF8.GetBytes(json));
        }
Example #2
0
        public async Task <GenericRecord> Deserialize(string topic, byte[] array)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                using (var stream = new MemoryStream(array))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != Constants.MagicByte)
                        {
                            // may change in the future.
                            throw new InvalidDataException($"magic byte should be {Constants.MagicByte}, not {magicByte}");
                        }
                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <GenericRecord> datumReader;
                        await deserializeMutex.WaitAsync();

                        try
                        {
                            datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                            if (datumReader == null)
                            {
                                // TODO: If any of this cache fills up, this is probably an
                                // indication of misuse of the deserializer. Ideally we would do
                                // something more sophisticated than the below + not allow
                                // the misuse to keep happening without warning.
                                if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                                {
                                    datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                var writerSchema = global::Avro.Schema.Parse(writerSchemaJson);

                                datumReader = new GenericReader <GenericRecord>(writerSchema, writerSchema);
                                datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            deserializeMutex.Release();
                        }

                        return(datumReader.Read(default(GenericRecord), new BinaryDecoder(stream)));
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
        public async Task <T> Deserialize(string topic, byte[] array)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                if (array.Length < 5)
                {
                    throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes");
                }

                using (var stream = new MemoryStream(array))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != Constants.MagicByte)
                        {
                            throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}");
                        }
                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <T> datumReader;
                        await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                        try
                        {
                            datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                            if (datumReader == null)
                            {
                                if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                                {
                                    datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                var writerSchema = global::Avro.Schema.Parse(writerSchemaJson.SchemaString);

                                datumReader = new SpecificReader <T>(writerSchema, ReaderSchema);
                                datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            deserializeMutex.Release();
                        }

                        return(datumReader.Read(default(T), new BinaryDecoder(stream)));
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
Example #4
0
        public async Task <object> DeserializeAsync(ReadOnlyMemory <byte> data, bool isNull, SerializationContext context)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                using (var stream = new MemoryStream(data.ToArray()))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != ConfluentConstants.MagicByte)
                        {
                            // may change in the future.
                            throw new InvalidDataException($"magic byte should be {ConfluentConstants.MagicByte}, not {magicByte}");
                        }

                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <object> datumReader;

                        await _deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                        try
                        {
                            _datumReaderBySchemaId.TryGetValue(writerId, out datumReader);

                            if (datumReader == null)
                            {
                                if (_datumReaderBySchemaId.Count > _schemaRegistryClient.MaxCachedSchemas)
                                {
                                    _datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaJson = await _schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                var writerSchema = global::Avro.Schema.Parse(writerSchemaJson);

                                // Get the ReaderSchema From The Local TopicSubjectSchemaCache
                                var readerSchema = _cache.GetSchema(writerSchema);

                                datumReader = new SpecificReader <object>(writerSchema, readerSchema);

                                _datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            _deserializeMutex.Release();
                        }

                        return(datumReader.Read(default, new BinaryDecoder(stream)));
        public async Task <T> Deserialize(string topic, byte[] array)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                using (var stream = new MemoryStream(array))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != Constants.MagicByte)
                        {
                            // may change in the future.
                            throw new DeserializationException($"magic byte should be {Constants.MagicByte}, not {magicByte}");
                        }
                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <T> datumReader;
                        await deserializeMutex.WaitAsync();

                        try
                        {
                            datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                            if (datumReader == null)
                            {
                                if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                                {
                                    datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                var writerSchema = global::Avro.Schema.Parse(writerSchemaJson);

                                datumReader = new SpecificReader <T>(writerSchema, ReaderSchema);
                                datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            deserializeMutex.Release();
                        }

                        return(datumReader.Read(default(T), new BinaryDecoder(stream)));
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
        /// <summary>
        /// Creates a deserializer.
        /// </summary>
        /// <param name="registryClient">
        /// A client to use for Schema Registry operations. (The client will not be disposed.)
        /// </param>
        /// <param name="deserializerBuilder">
        /// A deserializer builder (used to build deserialization functions for C# types). If none
        /// is provided, the default deserializer builder will be used.
        /// </param>
        /// <param name="schemaReader">
        /// A JSON schema reader (used to convert schemas received from the registry into abstract
        /// representations). If none is provided, the default schema reader will be used.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the registry client is null.
        /// </exception>
        public AsyncSchemaRegistryDeserializer(
            ISchemaRegistryClient registryClient,
            IBinaryDeserializerBuilder deserializerBuilder = null,
            IJsonSchemaReader schemaReader = null
            ) : this(
                deserializerBuilder,
                schemaReader
                )
        {
            if (registryClient == null)
            {
                throw new ArgumentNullException(nameof(registryClient));
            }

            _resolve = id => registryClient.GetSchemaAsync(id);
        }
        /// <summary>
        ///     Deserialize a byte array in to an instance of type
        ///     <see cref="ISpecificRecord" />. This is done by finding the
        ///     SchemaId from the provided <paramref name="data"/>. If the
        ///     schema has not been seen before the <see cref="ISchemaRegistryClient"/>
        ///     is used to download the schema. The schema is used to attempt
        ///     to load type information based on the namespace and name.
        ///     After getting the Namespace and Name of the SpecificRecord
        ///     the type is loaded and a concrete instance of
        ///     <see cref="AvroDeserializer{T}"/> is constructed and cached.
        ///     Using the cached deserializer the data is then deserialized
        ///     to an instance of <see cref="ISpecificRecord" />
        /// </summary>
        /// <param name="data">
        ///     The raw byte data to deserialize.
        /// </param>
        /// <param name="isNull">
        ///     True if this is a null value.
        /// </param>
        /// <param name="context">
        ///     Context relevant to the deserialize operation.
        /// </param>
        /// <returns>
        ///     A <see cref="System.Threading.Tasks.Task" /> that completes
        ///     with the deserialized value.
        /// </returns>
        /// <exception cref="System.IO.InvalidDataException">
        ///     Thrown when <paramref name="data"/> does not have a length of
        ///     at least 5 bytes.
        /// </exception>
        /// <exception cref="System.IO.InvalidDataException">
        ///     Thrown when the SchemaId indicated by <paramref name="data"/>
        ///     has a namespace + name which does not match a defined type
        /// </exception>
        public async Task <ISpecificRecord> DeserializeAsync(ReadOnlyMemory <byte> data, bool isNull, SerializationContext context)
        {
            var dataArray = data.ToArray();

            if (dataArray.Length < 5)
            {
                throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {dataArray.Length} bytes");
            }

            if (dataArray[0] != 0)
            {
                throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {dataArray[0]}, expecting {0}");
            }

            var schemaIdBytes = BitConverter.IsLittleEndian
                ? new[] { dataArray[4], dataArray[3], dataArray[2], dataArray[1] }
                : new [] { dataArray[1], dataArray[2], dataArray[3], dataArray[4] };

            var schemaId = BitConverter.ToInt32(schemaIdBytes, 0);

            if (_deserializerCache.ContainsKey(schemaId) == false)
            {
                var schema = await _schemaRegistryClient.GetSchemaAsync(schemaId).ConfigureAwait(false);

                var schemaJson = JObject.Parse(schema.SchemaString);
                var typeString = $"{schemaJson.SelectToken("namespace").Value<string>()}.{schemaJson.SelectToken("name").Value<string>()}";
                var recordType = Type.GetType(typeString);

                if (recordType == null)
                {
                    throw new InvalidDataException($"Deserialization failure, type {typeString} cannot be found");
                }

                var deserializerType = typeof(AvroDeserializer <>).MakeGenericType(recordType);

                dynamic deserializer = Activator.CreateInstance(deserializerType, _schemaRegistryClient, _config);

                _deserializerCache.Add(schemaId, deserializer);
            }

            return((ISpecificRecord)await _deserializerCache[schemaId].DeserializeAsync(data, false, context));
        }
Example #8
0
        public T Deserialize(string topic, byte[] array)
        {
            // Note: topic is not necessary for deserialization (or knowing if it's a key
            // or value) only the schema id is needed.

            using (var stream = new MemoryStream(array))
                using (var reader = new BinaryReader(stream))
                {
                    var magicByte = reader.ReadByte();
                    if (magicByte != Constants.MagicByte)
                    {
                        // may change in the future.
                        throw new InvalidDataException($"magic byte should be 0, not {magicByte}");
                    }
                    var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                    DatumReader <T> datumReader;
                    lock (deserializeLockObj)
                    {
                        datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                        if (datumReader == null)
                        {
                            if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                            {
                                datumReaderBySchemaId.Clear();
                            }

                            var writerSchemaJson = schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(false).GetAwaiter().GetResult();
                            var writerSchema     = Avro.Schema.Parse(writerSchemaJson);

                            datumReader = new SpecificReader <T>(writerSchema, ReaderSchema);
                            datumReaderBySchemaId[writerId] = datumReader;
                        }
                    }
                    return(datumReader.Read(default(T), new BinaryDecoder(stream)));
                }
        }
Example #9
0
        /// <inheritdoc/>
        public async Task <GenericRecord> Deserialize(string topic, byte[] array)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                if (array.Length < 5)
                {
                    throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes");
                }

                using (var stream = new MemoryStream(array))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != Constants.MagicByte)
                        {
                            throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}");
                        }
                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <GenericRecord> datumReader;
                        await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                        try
                        {
                            datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                            if (datumReader == null)
                            {
                                // TODO: If any of this cache fills up, this is probably an
                                // indication of misuse of the deserializer. Ideally we would do
                                // something more sophisticated than the below + not allow
                                // the misuse to keep happening without warning.
                                if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                                {
                                    datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaResult = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                if (writerSchemaResult.SchemaType != SchemaType.Avro)
                                {
                                    throw new InvalidOperationException("Expecting writer schema to have type Avro, not {writerSchemaResult.SchemaType}");
                                }

                                Avro.Schema writerSchema = null;
                                if (writerSchemaResult.References.Any() && IsUnion(writerSchemaResult.SchemaString))
                                {
                                    StringBuilder schemaBuilder = new StringBuilder();
                                    schemaBuilder.Append("[");
                                    foreach (var refSchema in writerSchemaResult.References)
                                    {
                                        var regSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchema.Subject,
                                                                                                            refSchema.Version)
                                                        .ConfigureAwait(continueOnCapturedContext: false);

                                        Avro.Schema schema = Avro.Schema.Parse(regSchema.SchemaString);

                                        if (schema.Tag != Avro.Schema.Type.Record)
                                        {
                                            throw new NotSupportedException("Only union schemas containing references to a record are supported for now");
                                        }

                                        schemaBuilder.Append($"{regSchema.SchemaString}");
                                        if (writerSchemaResult.References.Last() != refSchema)
                                        {
                                            schemaBuilder.Append(", ");
                                        }
                                    }

                                    schemaBuilder.Append("]");

                                    writerSchema = global::Avro.Schema.Parse(schemaBuilder.ToString());
                                }
                                else
                                {
                                    writerSchema = global::Avro.Schema.Parse(writerSchemaResult.SchemaString);
                                }

                                datumReader = new GenericReader <GenericRecord>(writerSchema, writerSchema);
                                datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            deserializeMutex.Release();
                        }

                        return(datumReader.Read(default(GenericRecord), new BinaryDecoder(stream)));
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
        public async Task <T> Deserialize(string topic, byte[] array)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                if (array.Length < 5)
                {
                    throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes");
                }

                using (var stream = new MemoryStream(array))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != Constants.MagicByte)
                        {
                            throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}");
                        }
                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <T> datumReader;
                        await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                        try
                        {
                            datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                            if (datumReader == null)
                            {
                                if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                                {
                                    datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaJson = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                var writerSchema = global::Avro.Schema.Parse(writerSchemaJson.SchemaString);

                                datumReader = new SpecificReader <T>(writerSchema, ReaderSchema);
                                datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            deserializeMutex.Release();
                        }

                        if (typeof(ISpecificRecord).IsAssignableFrom(typeof(T)))
                        {
                            // This is a generic deserializer and it knows the type that needs to be serialized into.
                            // Passing default(T) will result in null value and that will force the datumRead to
                            // use the schema namespace and name provided in the schema, which may not match (T).
                            var reuse = Activator.CreateInstance <T>();
                            return(datumReader.Read(reuse, new BinaryDecoder(stream)));
                        }

                        return(datumReader.Read(default(T), new BinaryDecoder(stream)));
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
 /// <summary>
 /// Builds a deserializer for a specific schema.
 /// </summary>
 /// <param name="id">
 /// The ID of the schema that should be used to deserialize data.
 /// </param>
 /// <exception cref="UnsupportedTypeException">
 /// Thrown when the type is incompatible with the retrieved schema.
 /// </exception>
 public virtual async Task <IDeserializer <T> > Build <T>(int id)
 {
     return(Build <T>(id, await RegistryClient.GetSchemaAsync(id)));
 }