Beispiel #1
0
        /// <inheritdoc/>
        public async Task <GenericRecord> Deserialize(string topic, byte[] array)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                if (array.Length < 5)
                {
                    throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes");
                }

                using (var stream = new MemoryStream(array))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != Constants.MagicByte)
                        {
                            throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}");
                        }
                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <GenericRecord> datumReader;
                        await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                        try
                        {
                            datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                            if (datumReader == null)
                            {
                                // TODO: If any of this cache fills up, this is probably an
                                // indication of misuse of the deserializer. Ideally we would do
                                // something more sophisticated than the below + not allow
                                // the misuse to keep happening without warning.
                                if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                                {
                                    datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaResult = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                if (writerSchemaResult.SchemaType != SchemaType.Avro)
                                {
                                    throw new InvalidOperationException("Expecting writer schema to have type Avro, not {writerSchemaResult.SchemaType}");
                                }

                                Avro.Schema writerSchema = null;
                                if (writerSchemaResult.References.Any() && IsUnion(writerSchemaResult.SchemaString))
                                {
                                    StringBuilder schemaBuilder = new StringBuilder();
                                    schemaBuilder.Append("[");
                                    foreach (var refSchema in writerSchemaResult.References)
                                    {
                                        var regSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchema.Subject,
                                                                                                            refSchema.Version)
                                                        .ConfigureAwait(continueOnCapturedContext: false);

                                        Avro.Schema schema = Avro.Schema.Parse(regSchema.SchemaString);

                                        if (schema.Tag != Avro.Schema.Type.Record)
                                        {
                                            throw new NotSupportedException("Only union schemas containing references to a record are supported for now");
                                        }

                                        schemaBuilder.Append($"{regSchema.SchemaString}");
                                        if (writerSchemaResult.References.Last() != refSchema)
                                        {
                                            schemaBuilder.Append(", ");
                                        }
                                    }

                                    schemaBuilder.Append("]");

                                    writerSchema = global::Avro.Schema.Parse(schemaBuilder.ToString());
                                }
                                else
                                {
                                    writerSchema = global::Avro.Schema.Parse(writerSchemaResult.SchemaString);
                                }

                                datumReader = new GenericReader <GenericRecord>(writerSchema, writerSchema);
                                datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            deserializeMutex.Release();
                        }

                        return(datumReader.Read(default(GenericRecord), new BinaryDecoder(stream)));
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
Beispiel #2
0
        /// <summary>
        ///     Serialize GenericRecord instance to a byte array in Avro format. The serialized
        ///     data is preceded by a "magic byte" (1 byte) and the id of the schema as registered
        ///     in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw
        ///     on first use for a particular topic during schema registration.
        /// </summary>
        /// <param name="topic">
        ///     The topic associated with the data.
        /// </param>
        /// <param name="data">
        ///     The object to serialize.
        /// </param>
        /// <param name="isKey">
        ///     whether or not the data represents a message key.
        /// </param>
        /// <returns>
        ///     <paramref name="data" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey)
        {
            try
            {
                int schemaId;
                global::Avro.Schema writerSchema;
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    // TODO: If any of these caches fills up, this is probably an
                    // indication of misuse of the serializer. Ideally we would do
                    // something more sophisticated than the below + not allow
                    // the misuse to keep happening without warning.
                    if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        schemaIds.Count > schemaRegistryClient.MaxCachedSchemas)
                    {
                        knownSchemas.Clear();
                        registeredSchemas.Clear();
                        schemaIds.Clear();
                    }

                    // Determine a schema string corresponding to the schema object.
                    // TODO: It would be more efficient to use a hash function based
                    // on the instance reference, not the implementation provided by
                    // Schema.
                    writerSchema = data.Schema;
                    string writerSchemaString = null;
                    if (knownSchemas.ContainsKey(writerSchema))
                    {
                        writerSchemaString = knownSchemas[writerSchema];
                    }
                    else
                    {
                        writerSchemaString = writerSchema.ToString();
                        knownSchemas.Add(writerSchema, writerSchemaString);
                    }

                    // Verify schema compatibility (& register as required) + get the
                    // id corresponding to the schema.

                    // TODO: Again, the hash functions in use below are potentially
                    // slow since writerSchemaString is potentially long. It would be
                    // better to use hash functions based on the writerSchemaString
                    // object reference, not value.

                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), data.Schema.Fullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : isKey
                            ? schemaRegistryClient.ConstructKeySubjectName(topic, data.Schema.Fullname)
                            : schemaRegistryClient.ConstructValueSubjectName(topic, data.Schema.Fullname);

                    var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString);
                    if (!registeredSchemas.Contains(subjectSchemaPair))
                    {
                        int newSchemaId;
                        // first usage: register/get schema to check compatibility
                        if (autoRegisterSchema)
                        {
                            newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }
                        // https://www.confluent.io/blog/multiple-event-types-in-the-same-kafka-topic/
                        else if (useLatestSchema)
                        {
                            RegisteredSchema regSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject)
                                                         .ConfigureAwait(continueOnCapturedContext: false);

                            //Do we have an Avro union with schema references
                            if (regSchema.References.Any() && IsUnion(regSchema.SchemaString))
                            {
                                RegisteredSchema registeredRefSchema = null;
                                StringBuilder    schemaBuilder       = new StringBuilder();
                                schemaBuilder.Append("[");
                                //We need to loop the schema references and perform a schema registry lookup
                                // in order to check compability with referencced schema
                                foreach (var refSchemaString in regSchema.References)
                                {
                                    registeredRefSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchemaString.Subject,
                                                                                                              refSchemaString.Version)
                                                          .ConfigureAwait(continueOnCapturedContext: false);

                                    Avro.Schema refSchema = Avro.Schema.Parse(registeredRefSchema.SchemaString);

                                    if (refSchema.Tag != Avro.Schema.Type.Record)
                                    {
                                        throw new NotSupportedException("Only union schemas containing references to a record are supported for now");
                                    }

                                    schemaBuilder.Append($"{registeredRefSchema.SchemaString}");
                                    if (regSchema.References.Last() != refSchemaString)
                                    {
                                        schemaBuilder.Append(",");
                                    }
                                }

                                schemaBuilder.Append("]");
                                unionSchemas[writerSchema] = global::Avro.Schema.Parse(schemaBuilder.ToString());
                                newSchemaId = regSchema.Id;
                                // subjectSchemaPair = new KeyValuePair<string, string>(subject, writerSchema.ToString());
                            }
                            else
                            {
                                newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString)
                                              .ConfigureAwait(continueOnCapturedContext: false);
                            }
                        }
                        else
                        {
                            newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }

                        if (!schemaIds.ContainsKey(writerSchemaString))
                        {
                            schemaIds.Add(writerSchemaString, newSchemaId);
                        }
                        else if (schemaIds[writerSchemaString] != newSchemaId)
                        {
                            schemaIds.Clear();
                            registeredSchemas.Clear();
                            throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema."));
                        }

                        registeredSchemas.Add(subjectSchemaPair);
                    }

                    schemaId = schemaIds[writerSchemaString];
                }
                finally
                {
                    serializeMutex.Release();
                }

                Avro.Schema unionSchema;
                if (unionSchemas.TryGetValue(writerSchema, out unionSchema))
                {
                    writerSchema = unionSchema;
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId));
                        new GenericWriter <GenericRecord>(writerSchema)
                        .Write(data, new BinaryEncoder(stream));
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }