Beispiel #1
0
        /// <remarks>
        ///     note: protobuf does not support circular file references, so this possibility isn't considered.
        /// </remarks>
        private async Task <List <SchemaReference> > RegisterOrGetReferences(FileDescriptor fd, SerializationContext context, bool autoRegisterSchema, bool skipKnownTypes)
        {
            var tasks = new List <Task <SchemaReference> >();

            for (int i = 0; i < fd.Dependencies.Count; ++i)
            {
                FileDescriptor fileDescriptor = fd.Dependencies[i];
                if (skipKnownTypes && fileDescriptor.Name.StartsWith("google/protobuf/"))
                {
                    continue;
                }

                Func <FileDescriptor, Task <SchemaReference> > t = async(FileDescriptor dependency) => {
                    var dependencyReferences = await RegisterOrGetReferences(dependency, context, autoRegisterSchema, skipKnownTypes).ConfigureAwait(continueOnCapturedContext: false);

                    var subject  = referenceSubjectNameStrategy(context, dependency.Name);
                    var schema   = new Schema(dependency.SerializedData.ToBase64(), dependencyReferences, SchemaType.Protobuf);
                    var schemaId = autoRegisterSchema
                        ? await schemaRegistryClient.RegisterSchemaAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false)
                        : await schemaRegistryClient.GetSchemaIdAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false);

                    var registeredDependentSchema = await schemaRegistryClient.LookupSchemaAsync(subject, schema, true).ConfigureAwait(continueOnCapturedContext: false);

                    return(new SchemaReference(dependency.Name, subject, registeredDependentSchema.Version));
                };
                tasks.Add(t(fileDescriptor));
            }
            await Task.WhenAll(tasks.ToArray()).ConfigureAwait(continueOnCapturedContext: false);

            return(tasks.Select(t => t.Result).ToList());
        }
Beispiel #2
0
        /// <remarks>
        ///     note: protobuf does not support circular file references, so this possibility isn't considered.
        /// </remarks>
        private async Task <List <SchemaReference> > RegisterOrGetReferences(FileDescriptor fd, SerializationContext context, bool autoRegisterSchema)
        {
            var result = new List <SchemaReference>();

            var tasks = new Task[fd.Dependencies.Count];

            for (int i = 0; i < fd.Dependencies.Count; ++i)
            {
                var         dependency = fd.Dependencies[i];
                Func <Task> t          = async() => {
                    var dependencyReferences = await RegisterOrGetReferences(dependency, context, autoRegisterSchema).ConfigureAwait(continueOnCapturedContext: false);

                    var subject  = referenceSubjectNameStrategy(context, dependency.Name);
                    var schema   = new Schema(dependency.SerializedData.ToBase64(), dependencyReferences, SchemaType.Protobuf);
                    var schemaId = autoRegisterSchema
                        ? await schemaRegistryClient.RegisterSchemaAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false)
                        : await schemaRegistryClient.GetSchemaIdAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false);

                    var registeredDependentSchema = await schemaRegistryClient.LookupSchemaAsync(subject, schema, true).ConfigureAwait(continueOnCapturedContext: false);

                    result.Add(new SchemaReference(dependency.Name, subject, registeredDependentSchema.Version));
                };
                tasks[i] = t();
            }
            await Task.WhenAll(tasks.ToArray()).ConfigureAwait(continueOnCapturedContext: false);

            return(result);
        }
        /// <summary>
        /// Creates a serializer.
        /// </summary>
        /// <param name="registryClient">
        /// A client to use for Schema Registry operations. (The client will not be disposed.)
        /// </param>
        /// <param name="registerAutomatically">
        /// Whether to automatically register schemas that match the type being serialized.
        /// </param>
        /// <param name="schemaBuilder">
        /// A schema builder (used to build a schema for a C# type when registering automatically).
        /// If none is provided, the default schema builder will be used.
        /// </param>
        /// <param name="schemaReader">
        /// A JSON schema reader (used to convert schemas received from the registry into abstract
        /// representations). If none is provided, the default schema reader will be used.
        /// </param>
        /// <param name="schemaWriter">
        /// A JSON schema writer (used to convert abstract schema representations when registering
        /// automatically). If none is provided, the default schema writer will be used.
        /// </param>
        /// <param name="serializerBuilder">
        /// A deserializer builder (used to build serialization functions for C# types). If none is
        /// provided, the default serializer builder will be used.
        /// </param>
        /// <param name="subjectNameBuilder">
        /// A function that determines the subject name given the topic name and a component type
        /// (key or value). If none is provided, the default "{topic name}-{component}" naming
        /// convention will be used.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the registry client is null.
        /// </exception>
        public AsyncSchemaRegistrySerializer(
            ISchemaRegistryClient registryClient,
            bool registerAutomatically                             = false,
            Abstract.ISchemaBuilder schemaBuilder                  = null,
            IJsonSchemaReader schemaReader                         = null,
            IJsonSchemaWriter schemaWriter                         = null,
            IBinarySerializerBuilder serializerBuilder             = null,
            Func <SerializationContext, string> subjectNameBuilder = null
            ) : this(
                registerAutomatically,
                schemaBuilder,
                schemaReader,
                schemaWriter,
                serializerBuilder,
                subjectNameBuilder
                )
        {
            if (registryClient == null)
            {
                throw new ArgumentNullException(nameof(registryClient));
            }

            _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json);
            _resolve  = subject => registryClient.GetLatestSchemaAsync(subject);
        }
Beispiel #4
0
        /// <summary>
        /// Creates a serializer.
        /// </summary>
        /// <param name="registryClient">
        /// The client to use for Schema Registry operations. (The client will not be disposed.)
        /// </param>
        /// <param name="registerAutomatically">
        /// Whether to automatically register schemas that match the type being serialized.
        /// </param>
        /// <param name="schemaBuilder">
        /// The schema builder to use to create a schema for a C# type when registering automatically.
        /// If none is provided, the default schema builder will be used.
        /// </param>
        /// <param name="schemaReader">
        /// The JSON schema reader to use to convert schemas received from the registry into abstract
        /// representations. If none is provided, the default schema reader will be used.
        /// </param>
        /// <param name="schemaWriter">
        /// The JSON schema writer to use to convert abstract schema representations when registering
        /// automatically. If none is provided, the default schema writer will be used.
        /// </param>
        /// <param name="serializerBuilder">
        /// The deserializer builder to use to build serialization functions for C# types. If none
        /// is provided, the default serializer builder will be used.
        /// </param>
        /// <param name="subjectNameBuilder">
        /// A function that determines the subject name given the topic name and a component type
        /// (key or value). If none is provided, the default "{topic name}-{component}" naming
        /// convention will be used.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the registry client is null.
        /// </exception>
        public AsyncSchemaRegistrySerializer(
            ISchemaRegistryClient registryClient,
            bool registerAutomatically                             = false,
            Abstract.ISchemaBuilder?schemaBuilder                  = null,
            IJsonSchemaReader?schemaReader                         = null,
            IJsonSchemaWriter?schemaWriter                         = null,
            IBinarySerializerBuilder?serializerBuilder             = null,
            Func <SerializationContext, string>?subjectNameBuilder = null
            )
        {
            if (registryClient == null)
            {
                throw new ArgumentNullException(nameof(registryClient));
            }

            RegisterAutomatically = registerAutomatically;
            SchemaBuilder         = schemaBuilder ?? new Abstract.SchemaBuilder();
            SchemaReader          = schemaReader ?? new JsonSchemaReader();
            SchemaWriter          = schemaWriter ?? new JsonSchemaWriter();
            SerializerBuilder     = serializerBuilder ?? new BinarySerializerBuilder();
            SubjectNameBuilder    = subjectNameBuilder ??
                                    (c => $"{c.Topic}-{(c.Component == MessageComponentType.Key ? "key" : "value")}");

            _cache    = new ConcurrentDictionary <string, Task <Func <T, byte[]> > >();
            _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json);
            _resolve  = subject => registryClient.GetLatestSchemaAsync(subject);
        }
        public byte[] Serialize(string topic, T data)
        {
            lock (serializeLockObj)
            {
                if (!topicsRegistered.Contains(topic))
                {
                    string subject = isKey
                        ? schemaRegistryClient.ConstructKeySubjectName(topic)
                        : schemaRegistryClient.ConstructValueSubjectName(topic);

                    // first usage: register/get schema to check compatibility

                    writerSchemaId = autoRegisterSchema
                        ? schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false).GetAwaiter().GetResult()
                        : schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false).GetAwaiter().GetResult();

                    topicsRegistered.Add(topic);
                }
            }

            using (var stream = new MemoryStream(initialBufferSize))
                using (var writer = new BinaryWriter(stream))
                {
                    stream.WriteByte(Constants.MagicByte);

                    writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value));
                    avroWriter.Write(data, new BinaryEncoder(stream));

                    // TODO: maybe change the ISerializer interface so that this copy isn't necessary.
                    return(stream.ToArray());
                }
        }
Beispiel #6
0
        /// <summary>
        /// Creates a serializer.
        /// </summary>
        /// <param name="registryClient">
        /// The client to use for Schema Registry operations. (The client will not be disposed.)
        /// </param>
        /// <param name="registerAutomatically">
        /// When to automatically register schemas that match the type being serialized.
        /// </param>
        /// <param name="schemaBuilder">
        /// The schema builder to use to create a schema for a C# type when registering automatically.
        /// If none is provided, the default schema builder will be used.
        /// </param>
        /// <param name="schemaReader">
        /// The JSON schema reader to use to convert schemas received from the registry into abstract
        /// representations. If none is provided, the default schema reader will be used.
        /// </param>
        /// <param name="schemaWriter">
        /// The JSON schema writer to use to convert abstract schema representations when registering
        /// automatically. If none is provided, the default schema writer will be used.
        /// </param>
        /// <param name="serializerBuilder">
        /// The deserializer builder to use to build serialization functions for C# types. If none
        /// is provided, the default serializer builder will be used.
        /// </param>
        /// <param name="subjectNameBuilder">
        /// A function that determines the subject name given the topic name and a component type
        /// (key or value). If none is provided, the default "{topic name}-{component}" naming
        /// convention will be used.
        /// </param>
        /// <param name="tombstoneBehavior">
        /// The behavior of the serializer on tombstone records.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the registry client is null.
        /// </exception>
        public AsyncSchemaRegistrySerializer(
            ISchemaRegistryClient registryClient,
            AutomaticRegistrationBehavior registerAutomatically = AutomaticRegistrationBehavior.Never,
            Abstract.ISchemaBuilder schemaBuilder                  = null,
            IJsonSchemaReader schemaReader                         = null,
            IJsonSchemaWriter schemaWriter                         = null,
            IBinarySerializerBuilder serializerBuilder             = null,
            Func <SerializationContext, string> subjectNameBuilder = null,
            TombstoneBehavior tombstoneBehavior                    = TombstoneBehavior.None
            )
        {
            if (registryClient == null)
            {
                throw new ArgumentNullException(nameof(registryClient));
            }

            if (tombstoneBehavior != TombstoneBehavior.None && default(T) != null)
            {
                throw new UnsupportedTypeException(typeof(T), $"{typeof(T)} cannot represent tombstone values.");
            }

            RegisterAutomatically = registerAutomatically;
            SchemaBuilder         = schemaBuilder ?? new Abstract.SchemaBuilder();
            SchemaReader          = schemaReader ?? new JsonSchemaReader();
            SchemaWriter          = schemaWriter ?? new JsonSchemaWriter();
            SerializerBuilder     = serializerBuilder ?? new BinarySerializerBuilder();
            SubjectNameBuilder    = subjectNameBuilder ??
                                    (c => $"{c.Topic}-{(c.Component == MessageComponentType.Key ? "key" : "value")}");
            TombstoneBehavior = tombstoneBehavior;

            _cache    = new Dictionary <string, Task <Func <T, byte[]> > >();
            _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json);
            _resolve  = subject => registryClient.GetLatestSchemaAsync(subject);
        }
        public async Task <byte[]> Serialize(string topic, T data, bool isKey)
        {
            try
            {
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    string fullname = null;
                    if (data is ISpecificRecord && ((ISpecificRecord)data).Schema is Avro.RecordSchema)
                    {
                        fullname = ((Avro.RecordSchema)((ISpecificRecord)data).Schema).Fullname;
                    }

                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), fullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : isKey
                            ? schemaRegistryClient.ConstructKeySubjectName(topic, fullname)
                            : schemaRegistryClient.ConstructValueSubjectName(topic, fullname);

                    if (!subjectsRegistered.Contains(subject))
                    {
                        // first usage: register/get schema to check compatibility
                        writerSchemaId = autoRegisterSchema
                            ? await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false)
                            : await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);

                        subjectsRegistered.Add(subject);
                    }
                }
                finally
                {
                    serializeMutex.Release();
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);

                        writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value));
                        avroWriter.Write(data, new BinaryEncoder(stream));

                        // TODO: maybe change the ISerializer interface so that this copy isn't necessary.
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
        public async Task <byte[]> Serialize(string topic, T data, bool isKey)
        {
            try
            {
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    string subject = isKey
                        ? schemaRegistryClient.ConstructKeySubjectName(topic, typeof(T).FullName)
                        : schemaRegistryClient.ConstructValueSubjectName(topic, typeof(T).FullName);

                    if (!subjectsRegistered.Contains(subject))
                    {
                        // first usage: register/get schema to check compatibility
                        writerSchemaId = autoRegisterSchema
                            ? await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false)
                            : await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);

                        subjectsRegistered.Add(subject);
                    }
                }
                finally
                {
                    serializeMutex.Release();
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);

                        writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value));
                        avroWriter.Write(data, new BinaryEncoder(stream));

                        // TODO: maybe change the ISerializer interface so that this copy isn't necessary.
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
        /// <summary>
        /// Builds a serializer for a specific schema.
        /// </summary>
        /// <param name="subject">
        /// The subject of the schema that should be used to serialize data. The latest version of
        /// the subject will be resolved.
        /// </param>
        /// <param name="registerAutomatically">
        /// Whether to automatically register a schema that matches <typeparamref name="T" /> if
        /// one does not already exist.
        /// </param>
        /// <exception cref="UnsupportedTypeException">
        /// Thrown when the type is incompatible with the retrieved schema or a matching schema
        /// cannot be generated.
        /// </exception>
        public async Task <ISerializer <T> > Build <T>(string subject, bool registerAutomatically = false)
        {
            try
            {
                var schema = await RegistryClient.GetLatestSchemaAsync(subject);

                return(Build <T>(schema.Id, schema.SchemaString));
            }
            catch (Exception e) when(registerAutomatically && (
                                         (e is SchemaRegistryException sre && sre.ErrorCode == 40401) ||
                                         (e is UnsupportedTypeException)
                                         ))
            {
                var schema = _schemaBuilder.BuildSchema <T>();
                var json   = _schemaWriter.Write(schema);

                var id = await RegistryClient.RegisterSchemaAsync(subject, json);

                return(Build <T>(id, json));
            }
        }
Beispiel #10
0
        public async Task <byte[]> Serialize(string topic, T data, bool isKey)
        {
            try
            {
                // We need the topic name when creating the
                if (_writerSchema == null)
                {
                    _writerSchema       = (global::Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null);
                    _writerSchemaString = _writerSchema.ToString();
                    _avroWriter         = new SpecificWriter <T>(_writerSchema);
                }


                await _serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    var subject = isKey ? SubjectNameFactory.KeySubjectNameFrom <T>(topic) : SubjectNameFactory.ValueSubjectNameFrom <T>(topic);

                    if (!_subjectsRegistered.Contains(subject))
                    {
                        // first usage: register/get schema to check compatibility
                        _writerSchemaId = _autoRegisterSchema
                            ? await _schemaRegistryClient.RegisterSchemaAsync(subject, _writerSchemaString).ConfigureAwait(continueOnCapturedContext: false)
                            : await _schemaRegistryClient.GetSchemaIdAsync(subject, _writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);

                        _subjectsRegistered.Add(subject);
                    }
                }
                finally
                {
                    _serializeMutex.Release();
                }

                if (_writerSchemaId.HasValue == false)
                {
                    throw new Exception("Not SchemaId Available For Message");
                }

                using (var stream = new MemoryStream(_initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(ConfluentConstants.MagicByte);

                        writer.Write(IPAddress.HostToNetworkOrder(_writerSchemaId.Value));
                        _avroWriter.Write(data, new BinaryEncoder(stream));

                        // TODO: maybe change the ISerializer interface so that this copy isn't necessary.
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                if (e.InnerException == null)
                {
                    throw;
                }

                throw e.InnerException;
            }
        }
Beispiel #11
0
        /// <summary>
        ///     Serialize GenericRecord instance to a byte array in avro format. The serialized
        ///     data is preceeded by a "magic byte" (1 byte) and the id of the schema as registered
        ///     in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw
        ///     on first use for a particular topic during schema registration.
        /// </summary>
        /// <param name="topic">
        ///     The topic associated wih the data.
        /// </param>
        /// <param name="data">
        ///     The object to serialize.
        /// </param>
        /// <returns>
        ///     <paramref name="data" /> serialized as a byte array.
        /// </returns>
        public byte[] Serialize(string topic, GenericRecord data)
        {
            int schemaId;

            Avro.RecordSchema writerSchema;
            lock (serializeLockObj)
            {
                // TODO: If any of these caches fills up, this is probably an
                // indication of misuse of the serializer. Ideally we would do
                // something more sophisticated than the below + not allow
                // the misuse to keep happening without warning.
                if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                    registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                    schemaIds.Count > schemaRegistryClient.MaxCachedSchemas)
                {
                    knownSchemas.Clear();
                    registeredSchemas.Clear();
                    schemaIds.Clear();
                }

                // Determine a schema string corresponding to the schema object.
                // TODO: It would be more efficient to use a hash function based
                // on the instance reference, not the implementation provided by
                // Schema.
                writerSchema = data.Schema;
                string writerSchemaString = null;
                if (knownSchemas.ContainsKey(writerSchema))
                {
                    writerSchemaString = knownSchemas[writerSchema];
                }
                else
                {
                    writerSchemaString = writerSchema.ToString();
                    knownSchemas.Add(writerSchema, writerSchemaString);
                }

                // Verify schema compatibility (& register as required) + get the
                // id corresponding to the schema.
                // TODO: Again, the hash functions in use below are potentially
                // slow since writerSchemaString is potentially long. It would be
                // better to use hash functions based on the writerSchemaString
                // object reference, not value.
                string subject = this.isKey
                    ? schemaRegistryClient.ConstructKeySubjectName(topic)
                    : schemaRegistryClient.ConstructValueSubjectName(topic);
                var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString);
                if (!registeredSchemas.Contains(subjectSchemaPair))
                {
                    // first usage: register/get schema to check compatibility
                    if (autoRegisterSchema)
                    {
                        schemaIds.Add(
                            writerSchemaString,
                            schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(false).GetAwaiter().GetResult());
                    }
                    else
                    {
                        schemaIds.Add(
                            writerSchemaString,
                            schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(false).GetAwaiter().GetResult());
                    }

                    registeredSchemas.Add(subjectSchemaPair);
                }
                schemaId = schemaIds[writerSchemaString];
            }

            using (var stream = new MemoryStream(initialBufferSize))
                using (var writer = new BinaryWriter(stream))
                {
                    stream.WriteByte(Constants.MagicByte);
                    writer.Write(IPAddress.HostToNetworkOrder(schemaId));
                    new GenericWriter <GenericRecord>(writerSchema)
                    .Write(data, new BinaryEncoder(stream));
                    return(stream.ToArray());
                }
        }
Beispiel #12
0
        /// <summary>
        ///     Serialize GenericRecord instance to a byte array in Avro format. The serialized
        ///     data is preceded by a "magic byte" (1 byte) and the id of the schema as registered
        ///     in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw
        ///     on first use for a particular topic during schema registration.
        /// </summary>
        /// <param name="topic">
        ///     The topic associated with the data.
        /// </param>
        /// <param name="data">
        ///     The object to serialize.
        /// </param>
        /// <param name="isKey">
        ///     whether or not the data represents a message key.
        /// </param>
        /// <returns>
        ///     <paramref name="data" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey)
        {
            try
            {
                int schemaId;
                global::Avro.Schema writerSchema;
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    // TODO: If any of these caches fills up, this is probably an
                    // indication of misuse of the serializer. Ideally we would do
                    // something more sophisticated than the below + not allow
                    // the misuse to keep happening without warning.
                    if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        schemaIds.Count > schemaRegistryClient.MaxCachedSchemas)
                    {
                        knownSchemas.Clear();
                        registeredSchemas.Clear();
                        schemaIds.Clear();
                    }

                    // Determine a schema string corresponding to the schema object.
                    // TODO: It would be more efficient to use a hash function based
                    // on the instance reference, not the implementation provided by
                    // Schema.
                    writerSchema = data.Schema;
                    string writerSchemaString = null;
                    if (knownSchemas.ContainsKey(writerSchema))
                    {
                        writerSchemaString = knownSchemas[writerSchema];
                    }
                    else
                    {
                        writerSchemaString = writerSchema.ToString();
                        knownSchemas.Add(writerSchema, writerSchemaString);
                    }

                    // Verify schema compatibility (& register as required) + get the
                    // id corresponding to the schema.

                    // TODO: Again, the hash functions in use below are potentially
                    // slow since writerSchemaString is potentially long. It would be
                    // better to use hash functions based on the writerSchemaString
                    // object reference, not value.

                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), data.Schema.Fullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : isKey
                            ? schemaRegistryClient.ConstructKeySubjectName(topic, data.Schema.Fullname)
                            : schemaRegistryClient.ConstructValueSubjectName(topic, data.Schema.Fullname);

                    var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString);
                    if (!registeredSchemas.Contains(subjectSchemaPair))
                    {
                        int newSchemaId;
                        // first usage: register/get schema to check compatibility
                        if (autoRegisterSchema)
                        {
                            newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }
                        // https://www.confluent.io/blog/multiple-event-types-in-the-same-kafka-topic/
                        else if (useLatestSchema)
                        {
                            RegisteredSchema regSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject)
                                                         .ConfigureAwait(continueOnCapturedContext: false);

                            //Do we have an Avro union with schema references
                            if (regSchema.References.Any() && IsUnion(regSchema.SchemaString))
                            {
                                RegisteredSchema registeredRefSchema = null;
                                StringBuilder    schemaBuilder       = new StringBuilder();
                                schemaBuilder.Append("[");
                                //We need to loop the schema references and perform a schema registry lookup
                                // in order to check compability with referencced schema
                                foreach (var refSchemaString in regSchema.References)
                                {
                                    registeredRefSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchemaString.Subject,
                                                                                                              refSchemaString.Version)
                                                          .ConfigureAwait(continueOnCapturedContext: false);

                                    Avro.Schema refSchema = Avro.Schema.Parse(registeredRefSchema.SchemaString);

                                    if (refSchema.Tag != Avro.Schema.Type.Record)
                                    {
                                        throw new NotSupportedException("Only union schemas containing references to a record are supported for now");
                                    }

                                    schemaBuilder.Append($"{registeredRefSchema.SchemaString}");
                                    if (regSchema.References.Last() != refSchemaString)
                                    {
                                        schemaBuilder.Append(",");
                                    }
                                }

                                schemaBuilder.Append("]");
                                unionSchemas[writerSchema] = global::Avro.Schema.Parse(schemaBuilder.ToString());
                                newSchemaId = regSchema.Id;
                                // subjectSchemaPair = new KeyValuePair<string, string>(subject, writerSchema.ToString());
                            }
                            else
                            {
                                newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString)
                                              .ConfigureAwait(continueOnCapturedContext: false);
                            }
                        }
                        else
                        {
                            newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }

                        if (!schemaIds.ContainsKey(writerSchemaString))
                        {
                            schemaIds.Add(writerSchemaString, newSchemaId);
                        }
                        else if (schemaIds[writerSchemaString] != newSchemaId)
                        {
                            schemaIds.Clear();
                            registeredSchemas.Clear();
                            throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema."));
                        }

                        registeredSchemas.Add(subjectSchemaPair);
                    }

                    schemaId = schemaIds[writerSchemaString];
                }
                finally
                {
                    serializeMutex.Release();
                }

                Avro.Schema unionSchema;
                if (unionSchemas.TryGetValue(writerSchema, out unionSchema))
                {
                    writerSchema = unionSchema;
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId));
                        new GenericWriter <GenericRecord>(writerSchema)
                        .Write(data, new BinaryEncoder(stream));
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
        /// <summary>
        ///     Serialize GenericRecord instance to a byte array in Avro format. The serialized
        ///     data is preceded by a "magic byte" (1 byte) and the id of the schema as registered
        ///     in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw
        ///     on first use for a particular topic during schema registration.
        /// </summary>
        /// <param name="topic">
        ///     The topic associated with the data.
        /// </param>
        /// <param name="data">
        ///     The object to serialize.
        /// </param>
        /// <param name="isKey">
        ///     whether or not the data represents a message key.
        /// </param>
        /// <returns>
        ///     <paramref name="data" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey)
        {
            try
            {
                int schemaId;
                global::Avro.RecordSchema writerSchema;
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    // TODO: If any of these caches fills up, this is probably an
                    // indication of misuse of the serializer. Ideally we would do
                    // something more sophisticated than the below + not allow
                    // the misuse to keep happening without warning.
                    if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        schemaIds.Count > schemaRegistryClient.MaxCachedSchemas)
                    {
                        knownSchemas.Clear();
                        registeredSchemas.Clear();
                        schemaIds.Clear();
                    }

                    // Determine a schema string corresponding to the schema object.
                    // TODO: It would be more efficient to use a hash function based
                    // on the instance reference, not the implementation provided by
                    // Schema.
                    writerSchema = data.Schema;
                    string writerSchemaString = null;
                    if (knownSchemas.ContainsKey(writerSchema))
                    {
                        writerSchemaString = knownSchemas[writerSchema];
                    }
                    else
                    {
                        writerSchemaString = writerSchema.ToString();
                        knownSchemas.Add(writerSchema, writerSchemaString);
                    }

                    // Verify schema compatibility (& register as required) + get the
                    // id corresponding to the schema.
                    // TODO: Again, the hash functions in use below are potentially
                    // slow since writerSchemaString is potentially long. It would be
                    // better to use hash functions based on the writerSchemaString
                    // object reference, not value.
                    string subject = isKey
                        ? schemaRegistryClient.ConstructKeySubjectName(topic)
                        : schemaRegistryClient.ConstructValueSubjectName(topic);

                    var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString);
                    if (!registeredSchemas.Contains(subjectSchemaPair))
                    {
                        int newSchemaId;
                        // first usage: register/get schema to check compatibility
                        if (autoRegisterSchema)
                        {
                            newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }
                        else
                        {
                            newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }

                        if (!schemaIds.ContainsKey(writerSchemaString))
                        {
                            schemaIds.Add(writerSchemaString, newSchemaId);
                        }
                        else if (schemaIds[writerSchemaString] != newSchemaId)
                        {
                            schemaIds.Clear();
                            registeredSchemas.Clear();
                            throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema."));
                        }

                        registeredSchemas.Add(subjectSchemaPair);
                    }
                    schemaId = schemaIds[writerSchemaString];
                }
                finally
                {
                    serializeMutex.Release();
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId));
                        new GenericWriter <GenericRecord>(writerSchema)
                        .Write(data, new BinaryEncoder(stream));
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
Beispiel #14
0
        /// <summary>
        ///     Serialize an instance of type <typeparamref name="T"/> to a UTF8 encoded JSON
        ///     represenation. The serialized data is preceeded by:
        ///       1. A "magic byte" (1 byte) that identifies this as a message with
        ///          Confluent Platform framing.
        ///       2. The id of the schema as registered in Confluent's Schema Registry
        ///          (4 bytes, network byte order).
        ///     This call may block or throw on first use for a particular topic during
        ///     schema registration / verification.
        /// </summary>
        /// <param name="value">
        ///     The value to serialize.
        /// </param>
        /// <param name="context">
        ///     Context relevant to the serialize operation.
        /// </param>
        /// <returns>
        ///     A <see cref="System.Threading.Tasks.Task" /> that completes with
        ///     <paramref name="value" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> SerializeAsync(T value, SerializationContext context)
        {
            if (value == null)
            {
                return(null);
            }

            var serializedString = Newtonsoft.Json.JsonConvert.SerializeObject(value);
            var validationResult = validator.Validate(serializedString, this.schema);

            if (validationResult.Count > 0)
            {
                throw new InvalidDataException("Schema validation failed for properties: [" + string.Join(", ", validationResult.Select(r => r.Path) + "]"));
            }

            try
            {
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(context, this.schemaFullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : context.Component == MessageComponentType.Key
                            ? schemaRegistryClient.ConstructKeySubjectName(context.Topic, this.schemaFullname)
                            : schemaRegistryClient.ConstructValueSubjectName(context.Topic, this.schemaFullname);

                    if (!subjectsRegistered.Contains(subject))
                    {
                        // first usage: register/get schema to check compatibility
                        schemaId = autoRegisterSchema
                            ? await schemaRegistryClient.RegisterSchemaAsync(subject, new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json))
                                   .ConfigureAwait(continueOnCapturedContext: false)
                            : await schemaRegistryClient.GetSchemaIdAsync(subject, new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json))
                                   .ConfigureAwait(continueOnCapturedContext: false);

                        // TODO: It may be better to fail fast if conflicting values for schemaId are seen here.

                        subjectsRegistered.Add(subject);
                    }
                }
                finally
                {
                    serializeMutex.Release();
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId.Value));
                        writer.Write(System.Text.Encoding.UTF8.GetBytes(serializedString));
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }