/// <summary>
        /// Creates a serializer.
        /// </summary>
        /// <param name="registryClient">
        /// A client to use for Schema Registry operations. (The client will not be disposed.)
        /// </param>
        /// <param name="registerAutomatically">
        /// Whether to automatically register schemas that match the type being serialized.
        /// </param>
        /// <param name="schemaBuilder">
        /// A schema builder (used to build a schema for a C# type when registering automatically).
        /// If none is provided, the default schema builder will be used.
        /// </param>
        /// <param name="schemaReader">
        /// A JSON schema reader (used to convert schemas received from the registry into abstract
        /// representations). If none is provided, the default schema reader will be used.
        /// </param>
        /// <param name="schemaWriter">
        /// A JSON schema writer (used to convert abstract schema representations when registering
        /// automatically). If none is provided, the default schema writer will be used.
        /// </param>
        /// <param name="serializerBuilder">
        /// A deserializer builder (used to build serialization functions for C# types). If none is
        /// provided, the default serializer builder will be used.
        /// </param>
        /// <param name="subjectNameBuilder">
        /// A function that determines the subject name given the topic name and a component type
        /// (key or value). If none is provided, the default "{topic name}-{component}" naming
        /// convention will be used.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the registry client is null.
        /// </exception>
        public AsyncSchemaRegistrySerializer(
            ISchemaRegistryClient registryClient,
            bool registerAutomatically                             = false,
            Abstract.ISchemaBuilder schemaBuilder                  = null,
            IJsonSchemaReader schemaReader                         = null,
            IJsonSchemaWriter schemaWriter                         = null,
            IBinarySerializerBuilder serializerBuilder             = null,
            Func <SerializationContext, string> subjectNameBuilder = null
            ) : this(
                registerAutomatically,
                schemaBuilder,
                schemaReader,
                schemaWriter,
                serializerBuilder,
                subjectNameBuilder
                )
        {
            if (registryClient == null)
            {
                throw new ArgumentNullException(nameof(registryClient));
            }

            _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json);
            _resolve  = subject => registryClient.GetLatestSchemaAsync(subject);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Creates a serializer.
        /// </summary>
        /// <param name="registryClient">
        /// The client to use for Schema Registry operations. (The client will not be disposed.)
        /// </param>
        /// <param name="registerAutomatically">
        /// Whether to automatically register schemas that match the type being serialized.
        /// </param>
        /// <param name="schemaBuilder">
        /// The schema builder to use to create a schema for a C# type when registering automatically.
        /// If none is provided, the default schema builder will be used.
        /// </param>
        /// <param name="schemaReader">
        /// The JSON schema reader to use to convert schemas received from the registry into abstract
        /// representations. If none is provided, the default schema reader will be used.
        /// </param>
        /// <param name="schemaWriter">
        /// The JSON schema writer to use to convert abstract schema representations when registering
        /// automatically. If none is provided, the default schema writer will be used.
        /// </param>
        /// <param name="serializerBuilder">
        /// The deserializer builder to use to build serialization functions for C# types. If none
        /// is provided, the default serializer builder will be used.
        /// </param>
        /// <param name="subjectNameBuilder">
        /// A function that determines the subject name given the topic name and a component type
        /// (key or value). If none is provided, the default "{topic name}-{component}" naming
        /// convention will be used.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the registry client is null.
        /// </exception>
        public AsyncSchemaRegistrySerializer(
            ISchemaRegistryClient registryClient,
            bool registerAutomatically                             = false,
            Abstract.ISchemaBuilder?schemaBuilder                  = null,
            IJsonSchemaReader?schemaReader                         = null,
            IJsonSchemaWriter?schemaWriter                         = null,
            IBinarySerializerBuilder?serializerBuilder             = null,
            Func <SerializationContext, string>?subjectNameBuilder = null
            )
        {
            if (registryClient == null)
            {
                throw new ArgumentNullException(nameof(registryClient));
            }

            RegisterAutomatically = registerAutomatically;
            SchemaBuilder         = schemaBuilder ?? new Abstract.SchemaBuilder();
            SchemaReader          = schemaReader ?? new JsonSchemaReader();
            SchemaWriter          = schemaWriter ?? new JsonSchemaWriter();
            SerializerBuilder     = serializerBuilder ?? new BinarySerializerBuilder();
            SubjectNameBuilder    = subjectNameBuilder ??
                                    (c => $"{c.Topic}-{(c.Component == MessageComponentType.Key ? "key" : "value")}");

            _cache    = new ConcurrentDictionary <string, Task <Func <T, byte[]> > >();
            _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json);
            _resolve  = subject => registryClient.GetLatestSchemaAsync(subject);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Creates a serializer.
        /// </summary>
        /// <param name="registryClient">
        /// The client to use for Schema Registry operations. (The client will not be disposed.)
        /// </param>
        /// <param name="registerAutomatically">
        /// When to automatically register schemas that match the type being serialized.
        /// </param>
        /// <param name="schemaBuilder">
        /// The schema builder to use to create a schema for a C# type when registering automatically.
        /// If none is provided, the default schema builder will be used.
        /// </param>
        /// <param name="schemaReader">
        /// The JSON schema reader to use to convert schemas received from the registry into abstract
        /// representations. If none is provided, the default schema reader will be used.
        /// </param>
        /// <param name="schemaWriter">
        /// The JSON schema writer to use to convert abstract schema representations when registering
        /// automatically. If none is provided, the default schema writer will be used.
        /// </param>
        /// <param name="serializerBuilder">
        /// The deserializer builder to use to build serialization functions for C# types. If none
        /// is provided, the default serializer builder will be used.
        /// </param>
        /// <param name="subjectNameBuilder">
        /// A function that determines the subject name given the topic name and a component type
        /// (key or value). If none is provided, the default "{topic name}-{component}" naming
        /// convention will be used.
        /// </param>
        /// <param name="tombstoneBehavior">
        /// The behavior of the serializer on tombstone records.
        /// </param>
        /// <exception cref="ArgumentNullException">
        /// Thrown when the registry client is null.
        /// </exception>
        public AsyncSchemaRegistrySerializer(
            ISchemaRegistryClient registryClient,
            AutomaticRegistrationBehavior registerAutomatically = AutomaticRegistrationBehavior.Never,
            Abstract.ISchemaBuilder schemaBuilder                  = null,
            IJsonSchemaReader schemaReader                         = null,
            IJsonSchemaWriter schemaWriter                         = null,
            IBinarySerializerBuilder serializerBuilder             = null,
            Func <SerializationContext, string> subjectNameBuilder = null,
            TombstoneBehavior tombstoneBehavior                    = TombstoneBehavior.None
            )
        {
            if (registryClient == null)
            {
                throw new ArgumentNullException(nameof(registryClient));
            }

            if (tombstoneBehavior != TombstoneBehavior.None && default(T) != null)
            {
                throw new UnsupportedTypeException(typeof(T), $"{typeof(T)} cannot represent tombstone values.");
            }

            RegisterAutomatically = registerAutomatically;
            SchemaBuilder         = schemaBuilder ?? new Abstract.SchemaBuilder();
            SchemaReader          = schemaReader ?? new JsonSchemaReader();
            SchemaWriter          = schemaWriter ?? new JsonSchemaWriter();
            SerializerBuilder     = serializerBuilder ?? new BinarySerializerBuilder();
            SubjectNameBuilder    = subjectNameBuilder ??
                                    (c => $"{c.Topic}-{(c.Component == MessageComponentType.Key ? "key" : "value")}");
            TombstoneBehavior = tombstoneBehavior;

            _cache    = new Dictionary <string, Task <Func <T, byte[]> > >();
            _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json);
            _resolve  = subject => registryClient.GetLatestSchemaAsync(subject);
        }
        /// <summary>
        /// Builds a serializer for a specific schema.
        /// </summary>
        /// <param name="subject">
        /// The subject of the schema that should be used to serialize data. The latest version of
        /// the subject will be resolved.
        /// </param>
        /// <param name="registerAutomatically">
        /// Whether to automatically register a schema that matches <typeparamref name="T" /> if
        /// one does not already exist.
        /// </param>
        /// <exception cref="UnsupportedTypeException">
        /// Thrown when the type is incompatible with the retrieved schema or a matching schema
        /// cannot be generated.
        /// </exception>
        public async Task <ISerializer <T> > Build <T>(string subject, bool registerAutomatically = false)
        {
            try
            {
                var schema = await RegistryClient.GetLatestSchemaAsync(subject);

                return(Build <T>(schema.Id, schema.SchemaString));
            }
            catch (Exception e) when(registerAutomatically && (
                                         (e is SchemaRegistryException sre && sre.ErrorCode == 40401) ||
                                         (e is UnsupportedTypeException)
                                         ))
            {
                var schema = _schemaBuilder.BuildSchema <T>();
                var json   = _schemaWriter.Write(schema);

                var id = await RegistryClient.RegisterSchemaAsync(subject, json);

                return(Build <T>(id, json));
            }
        }
Exemplo n.º 5
0
        /// <summary>
        ///     Serialize GenericRecord instance to a byte array in Avro format. The serialized
        ///     data is preceded by a "magic byte" (1 byte) and the id of the schema as registered
        ///     in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw
        ///     on first use for a particular topic during schema registration.
        /// </summary>
        /// <param name="topic">
        ///     The topic associated with the data.
        /// </param>
        /// <param name="data">
        ///     The object to serialize.
        /// </param>
        /// <param name="isKey">
        ///     whether or not the data represents a message key.
        /// </param>
        /// <returns>
        ///     <paramref name="data" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey)
        {
            try
            {
                int schemaId;
                global::Avro.Schema writerSchema;
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    // TODO: If any of these caches fills up, this is probably an
                    // indication of misuse of the serializer. Ideally we would do
                    // something more sophisticated than the below + not allow
                    // the misuse to keep happening without warning.
                    if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        schemaIds.Count > schemaRegistryClient.MaxCachedSchemas)
                    {
                        knownSchemas.Clear();
                        registeredSchemas.Clear();
                        schemaIds.Clear();
                    }

                    // Determine a schema string corresponding to the schema object.
                    // TODO: It would be more efficient to use a hash function based
                    // on the instance reference, not the implementation provided by
                    // Schema.
                    writerSchema = data.Schema;
                    string writerSchemaString = null;
                    if (knownSchemas.ContainsKey(writerSchema))
                    {
                        writerSchemaString = knownSchemas[writerSchema];
                    }
                    else
                    {
                        writerSchemaString = writerSchema.ToString();
                        knownSchemas.Add(writerSchema, writerSchemaString);
                    }

                    // Verify schema compatibility (& register as required) + get the
                    // id corresponding to the schema.

                    // TODO: Again, the hash functions in use below are potentially
                    // slow since writerSchemaString is potentially long. It would be
                    // better to use hash functions based on the writerSchemaString
                    // object reference, not value.

                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), data.Schema.Fullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : isKey
                            ? schemaRegistryClient.ConstructKeySubjectName(topic, data.Schema.Fullname)
                            : schemaRegistryClient.ConstructValueSubjectName(topic, data.Schema.Fullname);

                    var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString);
                    if (!registeredSchemas.Contains(subjectSchemaPair))
                    {
                        int newSchemaId;
                        // first usage: register/get schema to check compatibility
                        if (autoRegisterSchema)
                        {
                            newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }
                        // https://www.confluent.io/blog/multiple-event-types-in-the-same-kafka-topic/
                        else if (useLatestSchema)
                        {
                            RegisteredSchema regSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject)
                                                         .ConfigureAwait(continueOnCapturedContext: false);

                            //Do we have an Avro union with schema references
                            if (regSchema.References.Any() && IsUnion(regSchema.SchemaString))
                            {
                                RegisteredSchema registeredRefSchema = null;
                                StringBuilder    schemaBuilder       = new StringBuilder();
                                schemaBuilder.Append("[");
                                //We need to loop the schema references and perform a schema registry lookup
                                // in order to check compability with referencced schema
                                foreach (var refSchemaString in regSchema.References)
                                {
                                    registeredRefSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchemaString.Subject,
                                                                                                              refSchemaString.Version)
                                                          .ConfigureAwait(continueOnCapturedContext: false);

                                    Avro.Schema refSchema = Avro.Schema.Parse(registeredRefSchema.SchemaString);

                                    if (refSchema.Tag != Avro.Schema.Type.Record)
                                    {
                                        throw new NotSupportedException("Only union schemas containing references to a record are supported for now");
                                    }

                                    schemaBuilder.Append($"{registeredRefSchema.SchemaString}");
                                    if (regSchema.References.Last() != refSchemaString)
                                    {
                                        schemaBuilder.Append(",");
                                    }
                                }

                                schemaBuilder.Append("]");
                                unionSchemas[writerSchema] = global::Avro.Schema.Parse(schemaBuilder.ToString());
                                newSchemaId = regSchema.Id;
                                // subjectSchemaPair = new KeyValuePair<string, string>(subject, writerSchema.ToString());
                            }
                            else
                            {
                                newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString)
                                              .ConfigureAwait(continueOnCapturedContext: false);
                            }
                        }
                        else
                        {
                            newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }

                        if (!schemaIds.ContainsKey(writerSchemaString))
                        {
                            schemaIds.Add(writerSchemaString, newSchemaId);
                        }
                        else if (schemaIds[writerSchemaString] != newSchemaId)
                        {
                            schemaIds.Clear();
                            registeredSchemas.Clear();
                            throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema."));
                        }

                        registeredSchemas.Add(subjectSchemaPair);
                    }

                    schemaId = schemaIds[writerSchemaString];
                }
                finally
                {
                    serializeMutex.Release();
                }

                Avro.Schema unionSchema;
                if (unionSchemas.TryGetValue(writerSchema, out unionSchema))
                {
                    writerSchema = unionSchema;
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId));
                        new GenericWriter <GenericRecord>(writerSchema)
                        .Write(data, new BinaryEncoder(stream));
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
Exemplo n.º 6
0
        /// <summary>
        ///     Serialize an instance of type <typeparamref name="T"/> to a byte array
        ///     in Protobuf format. The serialized data is preceeded by:
        ///       1. A "magic byte" (1 byte) that identifies this as a message with
        ///          Confluent Platform framing.
        ///       2. The id of the schema as registered in Confluent's Schema Registry
        ///          (4 bytes, network byte order).
        ///       3. An size-prefixed array of indices that identify the specific message
        ///          type in the schema (a given schema can contain many message types
        ///          and they can be nested). Size and indices are unsigned varints. The
        ///          common case where the message type is the first message in the schema
        ///          (i.e. index data would be [1,0]) is encoded as simply a single 0 byte
        ///          as an optimization.
        ///     This call may block or throw on first use for a particular topic during
        ///     schema registration / verification.
        /// </summary>
        /// <param name="value">
        ///     The value to serialize.
        /// </param>
        /// <param name="context">
        ///     Context relevant to the serialize operation.
        /// </param>
        /// <returns>
        ///     A <see cref="System.Threading.Tasks.Task" /> that completes with
        ///     <paramref name="value" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> SerializeAsync(T value, SerializationContext context)
        {
            if (value == null)
            {
                return(null);
            }

            try
            {
                if (this.indexArray == null)
                {
                    this.indexArray = createIndexArray(value.Descriptor, useDeprecatedFormat);
                }

                string fullname = value.Descriptor.FullName;

                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(context, fullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : context.Component == MessageComponentType.Key
                            ? schemaRegistryClient.ConstructKeySubjectName(context.Topic, fullname)
                            : schemaRegistryClient.ConstructValueSubjectName(context.Topic, fullname);

                    if (!subjectsRegistered.Contains(subject))
                    {
                        if (useLatestVersion)
                        {
                            var latestSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject)
                                               .ConfigureAwait(continueOnCapturedContext: false);

                            schemaId = latestSchema.Id;
                        }
                        else
                        {
                            var references =
                                await RegisterOrGetReferences(value.Descriptor.File, context, autoRegisterSchema, skipKnownTypes)
                                .ConfigureAwait(continueOnCapturedContext: false);

                            // first usage: register/get schema to check compatibility
                            schemaId = autoRegisterSchema
                                ? await schemaRegistryClient.RegisterSchemaAsync(subject,
                                                                                 new Schema(value.Descriptor.File.SerializedData.ToBase64(), references,
                                                                                            SchemaType.Protobuf))
                                       .ConfigureAwait(continueOnCapturedContext: false)
                                : await schemaRegistryClient.GetSchemaIdAsync(subject,
                                                                              new Schema(value.Descriptor.File.SerializedData.ToBase64(), references,
                                                                                         SchemaType.Protobuf))
                                       .ConfigureAwait(continueOnCapturedContext: false);

                            // note: different values for schemaId should never be seen here.
                            // TODO: but fail fast may be better here.
                        }

                        subjectsRegistered.Add(subject);
                    }
                }
                finally
                {
                    serializeMutex.Release();
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId.Value));
                        writer.Write(this.indexArray);
                        value.WriteTo(stream);
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
        /// <summary>
        /// Builds a deserializer for a specific schema.
        /// </summary>
        /// <param name="subject">
        /// The subject of the schema that should be used to deserialize data. The latest version
        /// of the subject will be resolved.
        /// </param>
        /// <exception cref="UnsupportedTypeException">
        /// Thrown when the type is incompatible with the retrieved schema.
        /// </exception>
        public virtual async Task <IDeserializer <T> > Build <T>(string subject)
        {
            var schema = await RegistryClient.GetLatestSchemaAsync(subject);

            return(Build <T>(schema.Id, schema.SchemaString));
        }
Exemplo n.º 8
0
        /// <summary>
        ///     Serialize an instance of type <typeparamref name="T"/> to a UTF8 encoded JSON
        ///     represenation. The serialized data is preceeded by:
        ///       1. A "magic byte" (1 byte) that identifies this as a message with
        ///          Confluent Platform framing.
        ///       2. The id of the schema as registered in Confluent's Schema Registry
        ///          (4 bytes, network byte order).
        ///     This call may block or throw on first use for a particular topic during
        ///     schema registration / verification.
        /// </summary>
        /// <param name="value">
        ///     The value to serialize.
        /// </param>
        /// <param name="context">
        ///     Context relevant to the serialize operation.
        /// </param>
        /// <returns>
        ///     A <see cref="System.Threading.Tasks.Task" /> that completes with
        ///     <paramref name="value" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> SerializeAsync(T value, SerializationContext context)
        {
            if (value == null)
            {
                return(null);
            }

            var serializedString = Newtonsoft.Json.JsonConvert.SerializeObject(value, this.jsonSchemaGeneratorSettings?.ActualSerializerSettings);
            var validationResult = validator.Validate(serializedString, this.schema);

            if (validationResult.Count > 0)
            {
                throw new InvalidDataException("Schema validation failed for properties: [" + string.Join(", ", validationResult.Select(r => r.Path)) + "]");
            }

            try
            {
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(context, this.schemaFullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : context.Component == MessageComponentType.Key
                            ? schemaRegistryClient.ConstructKeySubjectName(context.Topic, this.schemaFullname)
                            : schemaRegistryClient.ConstructValueSubjectName(context.Topic, this.schemaFullname);

                    if (!subjectsRegistered.Contains(subject))
                    {
                        if (useLatestVersion)
                        {
                            var latestSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject)
                                               .ConfigureAwait(continueOnCapturedContext: false);

                            schemaId = latestSchema.Id;
                        }
                        else
                        {
                            // first usage: register/get schema to check compatibility
                            schemaId = autoRegisterSchema
                                ? await schemaRegistryClient.RegisterSchemaAsync(subject,
                                                                                 new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json), normalizeSchemas)
                                       .ConfigureAwait(continueOnCapturedContext: false)
                                : await schemaRegistryClient.GetSchemaIdAsync(subject,
                                                                              new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json), normalizeSchemas)
                                       .ConfigureAwait(continueOnCapturedContext: false);

                            // TODO: It may be better to fail fast if conflicting values for schemaId are seen here.
                        }

                        subjectsRegistered.Add(subject);
                    }
                }
                finally
                {
                    serializeMutex.Release();
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId.Value));
                        writer.Write(System.Text.Encoding.UTF8.GetBytes(serializedString));
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }