/// <remarks> /// note: protobuf does not support circular file references, so this possibility isn't considered. /// </remarks> private async Task <List <SchemaReference> > RegisterOrGetReferences(FileDescriptor fd, SerializationContext context, bool autoRegisterSchema, bool skipKnownTypes) { var tasks = new List <Task <SchemaReference> >(); for (int i = 0; i < fd.Dependencies.Count; ++i) { FileDescriptor fileDescriptor = fd.Dependencies[i]; if (skipKnownTypes && fileDescriptor.Name.StartsWith("google/protobuf/")) { continue; } Func <FileDescriptor, Task <SchemaReference> > t = async(FileDescriptor dependency) => { var dependencyReferences = await RegisterOrGetReferences(dependency, context, autoRegisterSchema, skipKnownTypes).ConfigureAwait(continueOnCapturedContext: false); var subject = referenceSubjectNameStrategy(context, dependency.Name); var schema = new Schema(dependency.SerializedData.ToBase64(), dependencyReferences, SchemaType.Protobuf); var schemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false); var registeredDependentSchema = await schemaRegistryClient.LookupSchemaAsync(subject, schema, true).ConfigureAwait(continueOnCapturedContext: false); return(new SchemaReference(dependency.Name, subject, registeredDependentSchema.Version)); }; tasks.Add(t(fileDescriptor)); } await Task.WhenAll(tasks.ToArray()).ConfigureAwait(continueOnCapturedContext: false); return(tasks.Select(t => t.Result).ToList()); }
/// <remarks> /// note: protobuf does not support circular file references, so this possibility isn't considered. /// </remarks> private async Task <List <SchemaReference> > RegisterOrGetReferences(FileDescriptor fd, SerializationContext context, bool autoRegisterSchema) { var result = new List <SchemaReference>(); var tasks = new Task[fd.Dependencies.Count]; for (int i = 0; i < fd.Dependencies.Count; ++i) { var dependency = fd.Dependencies[i]; Func <Task> t = async() => { var dependencyReferences = await RegisterOrGetReferences(dependency, context, autoRegisterSchema).ConfigureAwait(continueOnCapturedContext: false); var subject = referenceSubjectNameStrategy(context, dependency.Name); var schema = new Schema(dependency.SerializedData.ToBase64(), dependencyReferences, SchemaType.Protobuf); var schemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, schema).ConfigureAwait(continueOnCapturedContext: false); var registeredDependentSchema = await schemaRegistryClient.LookupSchemaAsync(subject, schema, true).ConfigureAwait(continueOnCapturedContext: false); result.Add(new SchemaReference(dependency.Name, subject, registeredDependentSchema.Version)); }; tasks[i] = t(); } await Task.WhenAll(tasks.ToArray()).ConfigureAwait(continueOnCapturedContext: false); return(result); }
/// <summary> /// Creates a serializer. /// </summary> /// <param name="registryClient"> /// A client to use for Schema Registry operations. (The client will not be disposed.) /// </param> /// <param name="registerAutomatically"> /// Whether to automatically register schemas that match the type being serialized. /// </param> /// <param name="schemaBuilder"> /// A schema builder (used to build a schema for a C# type when registering automatically). /// If none is provided, the default schema builder will be used. /// </param> /// <param name="schemaReader"> /// A JSON schema reader (used to convert schemas received from the registry into abstract /// representations). If none is provided, the default schema reader will be used. /// </param> /// <param name="schemaWriter"> /// A JSON schema writer (used to convert abstract schema representations when registering /// automatically). If none is provided, the default schema writer will be used. /// </param> /// <param name="serializerBuilder"> /// A deserializer builder (used to build serialization functions for C# types). If none is /// provided, the default serializer builder will be used. /// </param> /// <param name="subjectNameBuilder"> /// A function that determines the subject name given the topic name and a component type /// (key or value). If none is provided, the default "{topic name}-{component}" naming /// convention will be used. /// </param> /// <exception cref="ArgumentNullException"> /// Thrown when the registry client is null. /// </exception> public AsyncSchemaRegistrySerializer( ISchemaRegistryClient registryClient, bool registerAutomatically = false, Abstract.ISchemaBuilder schemaBuilder = null, IJsonSchemaReader schemaReader = null, IJsonSchemaWriter schemaWriter = null, IBinarySerializerBuilder serializerBuilder = null, Func <SerializationContext, string> subjectNameBuilder = null ) : this( registerAutomatically, schemaBuilder, schemaReader, schemaWriter, serializerBuilder, subjectNameBuilder ) { if (registryClient == null) { throw new ArgumentNullException(nameof(registryClient)); } _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json); _resolve = subject => registryClient.GetLatestSchemaAsync(subject); }
/// <summary> /// Creates a serializer. /// </summary> /// <param name="registryClient"> /// The client to use for Schema Registry operations. (The client will not be disposed.) /// </param> /// <param name="registerAutomatically"> /// Whether to automatically register schemas that match the type being serialized. /// </param> /// <param name="schemaBuilder"> /// The schema builder to use to create a schema for a C# type when registering automatically. /// If none is provided, the default schema builder will be used. /// </param> /// <param name="schemaReader"> /// The JSON schema reader to use to convert schemas received from the registry into abstract /// representations. If none is provided, the default schema reader will be used. /// </param> /// <param name="schemaWriter"> /// The JSON schema writer to use to convert abstract schema representations when registering /// automatically. If none is provided, the default schema writer will be used. /// </param> /// <param name="serializerBuilder"> /// The deserializer builder to use to build serialization functions for C# types. If none /// is provided, the default serializer builder will be used. /// </param> /// <param name="subjectNameBuilder"> /// A function that determines the subject name given the topic name and a component type /// (key or value). If none is provided, the default "{topic name}-{component}" naming /// convention will be used. /// </param> /// <exception cref="ArgumentNullException"> /// Thrown when the registry client is null. /// </exception> public AsyncSchemaRegistrySerializer( ISchemaRegistryClient registryClient, bool registerAutomatically = false, Abstract.ISchemaBuilder?schemaBuilder = null, IJsonSchemaReader?schemaReader = null, IJsonSchemaWriter?schemaWriter = null, IBinarySerializerBuilder?serializerBuilder = null, Func <SerializationContext, string>?subjectNameBuilder = null ) { if (registryClient == null) { throw new ArgumentNullException(nameof(registryClient)); } RegisterAutomatically = registerAutomatically; SchemaBuilder = schemaBuilder ?? new Abstract.SchemaBuilder(); SchemaReader = schemaReader ?? new JsonSchemaReader(); SchemaWriter = schemaWriter ?? new JsonSchemaWriter(); SerializerBuilder = serializerBuilder ?? new BinarySerializerBuilder(); SubjectNameBuilder = subjectNameBuilder ?? (c => $"{c.Topic}-{(c.Component == MessageComponentType.Key ? "key" : "value")}"); _cache = new ConcurrentDictionary <string, Task <Func <T, byte[]> > >(); _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json); _resolve = subject => registryClient.GetLatestSchemaAsync(subject); }
public byte[] Serialize(string topic, T data) { lock (serializeLockObj) { if (!topicsRegistered.Contains(topic)) { string subject = isKey ? schemaRegistryClient.ConstructKeySubjectName(topic) : schemaRegistryClient.ConstructValueSubjectName(topic); // first usage: register/get schema to check compatibility writerSchemaId = autoRegisterSchema ? schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false).GetAwaiter().GetResult() : schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false).GetAwaiter().GetResult(); topicsRegistered.Add(topic); } } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value)); avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } }
/// <summary> /// Creates a serializer. /// </summary> /// <param name="registryClient"> /// The client to use for Schema Registry operations. (The client will not be disposed.) /// </param> /// <param name="registerAutomatically"> /// When to automatically register schemas that match the type being serialized. /// </param> /// <param name="schemaBuilder"> /// The schema builder to use to create a schema for a C# type when registering automatically. /// If none is provided, the default schema builder will be used. /// </param> /// <param name="schemaReader"> /// The JSON schema reader to use to convert schemas received from the registry into abstract /// representations. If none is provided, the default schema reader will be used. /// </param> /// <param name="schemaWriter"> /// The JSON schema writer to use to convert abstract schema representations when registering /// automatically. If none is provided, the default schema writer will be used. /// </param> /// <param name="serializerBuilder"> /// The deserializer builder to use to build serialization functions for C# types. If none /// is provided, the default serializer builder will be used. /// </param> /// <param name="subjectNameBuilder"> /// A function that determines the subject name given the topic name and a component type /// (key or value). If none is provided, the default "{topic name}-{component}" naming /// convention will be used. /// </param> /// <param name="tombstoneBehavior"> /// The behavior of the serializer on tombstone records. /// </param> /// <exception cref="ArgumentNullException"> /// Thrown when the registry client is null. /// </exception> public AsyncSchemaRegistrySerializer( ISchemaRegistryClient registryClient, AutomaticRegistrationBehavior registerAutomatically = AutomaticRegistrationBehavior.Never, Abstract.ISchemaBuilder schemaBuilder = null, IJsonSchemaReader schemaReader = null, IJsonSchemaWriter schemaWriter = null, IBinarySerializerBuilder serializerBuilder = null, Func <SerializationContext, string> subjectNameBuilder = null, TombstoneBehavior tombstoneBehavior = TombstoneBehavior.None ) { if (registryClient == null) { throw new ArgumentNullException(nameof(registryClient)); } if (tombstoneBehavior != TombstoneBehavior.None && default(T) != null) { throw new UnsupportedTypeException(typeof(T), $"{typeof(T)} cannot represent tombstone values."); } RegisterAutomatically = registerAutomatically; SchemaBuilder = schemaBuilder ?? new Abstract.SchemaBuilder(); SchemaReader = schemaReader ?? new JsonSchemaReader(); SchemaWriter = schemaWriter ?? new JsonSchemaWriter(); SerializerBuilder = serializerBuilder ?? new BinarySerializerBuilder(); SubjectNameBuilder = subjectNameBuilder ?? (c => $"{c.Topic}-{(c.Component == MessageComponentType.Key ? "key" : "value")}"); TombstoneBehavior = tombstoneBehavior; _cache = new Dictionary <string, Task <Func <T, byte[]> > >(); _register = (subject, json) => registryClient.RegisterSchemaAsync(subject, json); _resolve = subject => registryClient.GetLatestSchemaAsync(subject); }
public async Task <byte[]> Serialize(string topic, T data, bool isKey) { try { await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { string fullname = null; if (data is ISpecificRecord && ((ISpecificRecord)data).Schema is Avro.RecordSchema) { fullname = ((Avro.RecordSchema)((ISpecificRecord)data).Schema).Fullname; } string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), fullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : isKey ? schemaRegistryClient.ConstructKeySubjectName(topic, fullname) : schemaRegistryClient.ConstructValueSubjectName(topic, fullname); if (!subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility writerSchemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); subjectsRegistered.Add(subject); } } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value)); avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
public async Task <byte[]> Serialize(string topic, T data, bool isKey) { try { await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { string subject = isKey ? schemaRegistryClient.ConstructKeySubjectName(topic, typeof(T).FullName) : schemaRegistryClient.ConstructValueSubjectName(topic, typeof(T).FullName); if (!subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility writerSchemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); subjectsRegistered.Add(subject); } } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value)); avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Builds a serializer for a specific schema. /// </summary> /// <param name="subject"> /// The subject of the schema that should be used to serialize data. The latest version of /// the subject will be resolved. /// </param> /// <param name="registerAutomatically"> /// Whether to automatically register a schema that matches <typeparamref name="T" /> if /// one does not already exist. /// </param> /// <exception cref="UnsupportedTypeException"> /// Thrown when the type is incompatible with the retrieved schema or a matching schema /// cannot be generated. /// </exception> public async Task <ISerializer <T> > Build <T>(string subject, bool registerAutomatically = false) { try { var schema = await RegistryClient.GetLatestSchemaAsync(subject); return(Build <T>(schema.Id, schema.SchemaString)); } catch (Exception e) when(registerAutomatically && ( (e is SchemaRegistryException sre && sre.ErrorCode == 40401) || (e is UnsupportedTypeException) )) { var schema = _schemaBuilder.BuildSchema <T>(); var json = _schemaWriter.Write(schema); var id = await RegistryClient.RegisterSchemaAsync(subject, json); return(Build <T>(id, json)); } }
public async Task <byte[]> Serialize(string topic, T data, bool isKey) { try { // We need the topic name when creating the if (_writerSchema == null) { _writerSchema = (global::Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null); _writerSchemaString = _writerSchema.ToString(); _avroWriter = new SpecificWriter <T>(_writerSchema); } await _serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { var subject = isKey ? SubjectNameFactory.KeySubjectNameFrom <T>(topic) : SubjectNameFactory.ValueSubjectNameFrom <T>(topic); if (!_subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility _writerSchemaId = _autoRegisterSchema ? await _schemaRegistryClient.RegisterSchemaAsync(subject, _writerSchemaString).ConfigureAwait(continueOnCapturedContext: false) : await _schemaRegistryClient.GetSchemaIdAsync(subject, _writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); _subjectsRegistered.Add(subject); } } finally { _serializeMutex.Release(); } if (_writerSchemaId.HasValue == false) { throw new Exception("Not SchemaId Available For Message"); } using (var stream = new MemoryStream(_initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(ConfluentConstants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(_writerSchemaId.Value)); _avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } } catch (AggregateException e) { if (e.InnerException == null) { throw; } throw e.InnerException; } }
/// <summary> /// Serialize GenericRecord instance to a byte array in avro format. The serialized /// data is preceeded by a "magic byte" (1 byte) and the id of the schema as registered /// in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw /// on first use for a particular topic during schema registration. /// </summary> /// <param name="topic"> /// The topic associated wih the data. /// </param> /// <param name="data"> /// The object to serialize. /// </param> /// <returns> /// <paramref name="data" /> serialized as a byte array. /// </returns> public byte[] Serialize(string topic, GenericRecord data) { int schemaId; Avro.RecordSchema writerSchema; lock (serializeLockObj) { // TODO: If any of these caches fills up, this is probably an // indication of misuse of the serializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas || registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas || schemaIds.Count > schemaRegistryClient.MaxCachedSchemas) { knownSchemas.Clear(); registeredSchemas.Clear(); schemaIds.Clear(); } // Determine a schema string corresponding to the schema object. // TODO: It would be more efficient to use a hash function based // on the instance reference, not the implementation provided by // Schema. writerSchema = data.Schema; string writerSchemaString = null; if (knownSchemas.ContainsKey(writerSchema)) { writerSchemaString = knownSchemas[writerSchema]; } else { writerSchemaString = writerSchema.ToString(); knownSchemas.Add(writerSchema, writerSchemaString); } // Verify schema compatibility (& register as required) + get the // id corresponding to the schema. // TODO: Again, the hash functions in use below are potentially // slow since writerSchemaString is potentially long. It would be // better to use hash functions based on the writerSchemaString // object reference, not value. string subject = this.isKey ? schemaRegistryClient.ConstructKeySubjectName(topic) : schemaRegistryClient.ConstructValueSubjectName(topic); var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString); if (!registeredSchemas.Contains(subjectSchemaPair)) { // first usage: register/get schema to check compatibility if (autoRegisterSchema) { schemaIds.Add( writerSchemaString, schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(false).GetAwaiter().GetResult()); } else { schemaIds.Add( writerSchemaString, schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(false).GetAwaiter().GetResult()); } registeredSchemas.Add(subjectSchemaPair); } schemaId = schemaIds[writerSchemaString]; } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId)); new GenericWriter <GenericRecord>(writerSchema) .Write(data, new BinaryEncoder(stream)); return(stream.ToArray()); } }
/// <summary> /// Serialize GenericRecord instance to a byte array in Avro format. The serialized /// data is preceded by a "magic byte" (1 byte) and the id of the schema as registered /// in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw /// on first use for a particular topic during schema registration. /// </summary> /// <param name="topic"> /// The topic associated with the data. /// </param> /// <param name="data"> /// The object to serialize. /// </param> /// <param name="isKey"> /// whether or not the data represents a message key. /// </param> /// <returns> /// <paramref name="data" /> serialized as a byte array. /// </returns> public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey) { try { int schemaId; global::Avro.Schema writerSchema; await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { // TODO: If any of these caches fills up, this is probably an // indication of misuse of the serializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas || registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas || schemaIds.Count > schemaRegistryClient.MaxCachedSchemas) { knownSchemas.Clear(); registeredSchemas.Clear(); schemaIds.Clear(); } // Determine a schema string corresponding to the schema object. // TODO: It would be more efficient to use a hash function based // on the instance reference, not the implementation provided by // Schema. writerSchema = data.Schema; string writerSchemaString = null; if (knownSchemas.ContainsKey(writerSchema)) { writerSchemaString = knownSchemas[writerSchema]; } else { writerSchemaString = writerSchema.ToString(); knownSchemas.Add(writerSchema, writerSchemaString); } // Verify schema compatibility (& register as required) + get the // id corresponding to the schema. // TODO: Again, the hash functions in use below are potentially // slow since writerSchemaString is potentially long. It would be // better to use hash functions based on the writerSchemaString // object reference, not value. string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), data.Schema.Fullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : isKey ? schemaRegistryClient.ConstructKeySubjectName(topic, data.Schema.Fullname) : schemaRegistryClient.ConstructValueSubjectName(topic, data.Schema.Fullname); var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString); if (!registeredSchemas.Contains(subjectSchemaPair)) { int newSchemaId; // first usage: register/get schema to check compatibility if (autoRegisterSchema) { newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } // https://www.confluent.io/blog/multiple-event-types-in-the-same-kafka-topic/ else if (useLatestSchema) { RegisteredSchema regSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject) .ConfigureAwait(continueOnCapturedContext: false); //Do we have an Avro union with schema references if (regSchema.References.Any() && IsUnion(regSchema.SchemaString)) { RegisteredSchema registeredRefSchema = null; StringBuilder schemaBuilder = new StringBuilder(); schemaBuilder.Append("["); //We need to loop the schema references and perform a schema registry lookup // in order to check compability with referencced schema foreach (var refSchemaString in regSchema.References) { registeredRefSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchemaString.Subject, refSchemaString.Version) .ConfigureAwait(continueOnCapturedContext: false); Avro.Schema refSchema = Avro.Schema.Parse(registeredRefSchema.SchemaString); if (refSchema.Tag != Avro.Schema.Type.Record) { throw new NotSupportedException("Only union schemas containing references to a record are supported for now"); } schemaBuilder.Append($"{registeredRefSchema.SchemaString}"); if (regSchema.References.Last() != refSchemaString) { schemaBuilder.Append(","); } } schemaBuilder.Append("]"); unionSchemas[writerSchema] = global::Avro.Schema.Parse(schemaBuilder.ToString()); newSchemaId = regSchema.Id; // subjectSchemaPair = new KeyValuePair<string, string>(subject, writerSchema.ToString()); } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString) .ConfigureAwait(continueOnCapturedContext: false); } } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } if (!schemaIds.ContainsKey(writerSchemaString)) { schemaIds.Add(writerSchemaString, newSchemaId); } else if (schemaIds[writerSchemaString] != newSchemaId) { schemaIds.Clear(); registeredSchemas.Clear(); throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema.")); } registeredSchemas.Add(subjectSchemaPair); } schemaId = schemaIds[writerSchemaString]; } finally { serializeMutex.Release(); } Avro.Schema unionSchema; if (unionSchemas.TryGetValue(writerSchema, out unionSchema)) { writerSchema = unionSchema; } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId)); new GenericWriter <GenericRecord>(writerSchema) .Write(data, new BinaryEncoder(stream)); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Serialize GenericRecord instance to a byte array in Avro format. The serialized /// data is preceded by a "magic byte" (1 byte) and the id of the schema as registered /// in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw /// on first use for a particular topic during schema registration. /// </summary> /// <param name="topic"> /// The topic associated with the data. /// </param> /// <param name="data"> /// The object to serialize. /// </param> /// <param name="isKey"> /// whether or not the data represents a message key. /// </param> /// <returns> /// <paramref name="data" /> serialized as a byte array. /// </returns> public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey) { try { int schemaId; global::Avro.RecordSchema writerSchema; await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { // TODO: If any of these caches fills up, this is probably an // indication of misuse of the serializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas || registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas || schemaIds.Count > schemaRegistryClient.MaxCachedSchemas) { knownSchemas.Clear(); registeredSchemas.Clear(); schemaIds.Clear(); } // Determine a schema string corresponding to the schema object. // TODO: It would be more efficient to use a hash function based // on the instance reference, not the implementation provided by // Schema. writerSchema = data.Schema; string writerSchemaString = null; if (knownSchemas.ContainsKey(writerSchema)) { writerSchemaString = knownSchemas[writerSchema]; } else { writerSchemaString = writerSchema.ToString(); knownSchemas.Add(writerSchema, writerSchemaString); } // Verify schema compatibility (& register as required) + get the // id corresponding to the schema. // TODO: Again, the hash functions in use below are potentially // slow since writerSchemaString is potentially long. It would be // better to use hash functions based on the writerSchemaString // object reference, not value. string subject = isKey ? schemaRegistryClient.ConstructKeySubjectName(topic) : schemaRegistryClient.ConstructValueSubjectName(topic); var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString); if (!registeredSchemas.Contains(subjectSchemaPair)) { int newSchemaId; // first usage: register/get schema to check compatibility if (autoRegisterSchema) { newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } if (!schemaIds.ContainsKey(writerSchemaString)) { schemaIds.Add(writerSchemaString, newSchemaId); } else if (schemaIds[writerSchemaString] != newSchemaId) { schemaIds.Clear(); registeredSchemas.Clear(); throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema.")); } registeredSchemas.Add(subjectSchemaPair); } schemaId = schemaIds[writerSchemaString]; } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId)); new GenericWriter <GenericRecord>(writerSchema) .Write(data, new BinaryEncoder(stream)); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Serialize an instance of type <typeparamref name="T"/> to a UTF8 encoded JSON /// represenation. The serialized data is preceeded by: /// 1. A "magic byte" (1 byte) that identifies this as a message with /// Confluent Platform framing. /// 2. The id of the schema as registered in Confluent's Schema Registry /// (4 bytes, network byte order). /// This call may block or throw on first use for a particular topic during /// schema registration / verification. /// </summary> /// <param name="value"> /// The value to serialize. /// </param> /// <param name="context"> /// Context relevant to the serialize operation. /// </param> /// <returns> /// A <see cref="System.Threading.Tasks.Task" /> that completes with /// <paramref name="value" /> serialized as a byte array. /// </returns> public async Task <byte[]> SerializeAsync(T value, SerializationContext context) { if (value == null) { return(null); } var serializedString = Newtonsoft.Json.JsonConvert.SerializeObject(value); var validationResult = validator.Validate(serializedString, this.schema); if (validationResult.Count > 0) { throw new InvalidDataException("Schema validation failed for properties: [" + string.Join(", ", validationResult.Select(r => r.Path) + "]")); } try { await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(context, this.schemaFullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : context.Component == MessageComponentType.Key ? schemaRegistryClient.ConstructKeySubjectName(context.Topic, this.schemaFullname) : schemaRegistryClient.ConstructValueSubjectName(context.Topic, this.schemaFullname); if (!subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility schemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json)) .ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json)) .ConfigureAwait(continueOnCapturedContext: false); // TODO: It may be better to fail fast if conflicting values for schemaId are seen here. subjectsRegistered.Add(subject); } } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId.Value)); writer.Write(System.Text.Encoding.UTF8.GetBytes(serializedString)); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }