public byte[] Serialize(string topic, T data) { lock (serializeLockObj) { if (!topicsRegistered.Contains(topic)) { string subject = isKey ? schemaRegistryClient.ConstructKeySubjectName(topic) : schemaRegistryClient.ConstructValueSubjectName(topic); // first usage: register/get schema to check compatibility writerSchemaId = autoRegisterSchema ? schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false).GetAwaiter().GetResult() : schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false).GetAwaiter().GetResult(); topicsRegistered.Add(topic); } } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value)); avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } }
public async Task <byte[]> Serialize(string topic, T data, bool isKey) { try { await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { string fullname = null; if (data is ISpecificRecord && ((ISpecificRecord)data).Schema is Avro.RecordSchema) { fullname = ((Avro.RecordSchema)((ISpecificRecord)data).Schema).Fullname; } string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), fullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : isKey ? schemaRegistryClient.ConstructKeySubjectName(topic, fullname) : schemaRegistryClient.ConstructValueSubjectName(topic, fullname); if (!subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility writerSchemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); subjectsRegistered.Add(subject); } } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value)); avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
public async Task <byte[]> Serialize(string topic, T data, bool isKey) { try { await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { string subject = isKey ? schemaRegistryClient.ConstructKeySubjectName(topic, typeof(T).FullName) : schemaRegistryClient.ConstructValueSubjectName(topic, typeof(T).FullName); if (!subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility writerSchemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); subjectsRegistered.Add(subject); } } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(writerSchemaId.Value)); avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Serialize GenericRecord instance to a byte array in avro format. The serialized /// data is preceeded by a "magic byte" (1 byte) and the id of the schema as registered /// in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw /// on first use for a particular topic during schema registration. /// </summary> /// <param name="topic"> /// The topic associated wih the data. /// </param> /// <param name="data"> /// The object to serialize. /// </param> /// <returns> /// <paramref name="data" /> serialized as a byte array. /// </returns> public byte[] Serialize(string topic, GenericRecord data) { int schemaId; Avro.RecordSchema writerSchema; lock (serializeLockObj) { // TODO: If any of these caches fills up, this is probably an // indication of misuse of the serializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas || registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas || schemaIds.Count > schemaRegistryClient.MaxCachedSchemas) { knownSchemas.Clear(); registeredSchemas.Clear(); schemaIds.Clear(); } // Determine a schema string corresponding to the schema object. // TODO: It would be more efficient to use a hash function based // on the instance reference, not the implementation provided by // Schema. writerSchema = data.Schema; string writerSchemaString = null; if (knownSchemas.ContainsKey(writerSchema)) { writerSchemaString = knownSchemas[writerSchema]; } else { writerSchemaString = writerSchema.ToString(); knownSchemas.Add(writerSchema, writerSchemaString); } // Verify schema compatibility (& register as required) + get the // id corresponding to the schema. // TODO: Again, the hash functions in use below are potentially // slow since writerSchemaString is potentially long. It would be // better to use hash functions based on the writerSchemaString // object reference, not value. string subject = this.isKey ? schemaRegistryClient.ConstructKeySubjectName(topic) : schemaRegistryClient.ConstructValueSubjectName(topic); var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString); if (!registeredSchemas.Contains(subjectSchemaPair)) { // first usage: register/get schema to check compatibility if (autoRegisterSchema) { schemaIds.Add( writerSchemaString, schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(false).GetAwaiter().GetResult()); } else { schemaIds.Add( writerSchemaString, schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(false).GetAwaiter().GetResult()); } registeredSchemas.Add(subjectSchemaPair); } schemaId = schemaIds[writerSchemaString]; } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId)); new GenericWriter <GenericRecord>(writerSchema) .Write(data, new BinaryEncoder(stream)); return(stream.ToArray()); } }
/// <summary> /// Serialize GenericRecord instance to a byte array in Avro format. The serialized /// data is preceded by a "magic byte" (1 byte) and the id of the schema as registered /// in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw /// on first use for a particular topic during schema registration. /// </summary> /// <param name="topic"> /// The topic associated with the data. /// </param> /// <param name="data"> /// The object to serialize. /// </param> /// <param name="isKey"> /// whether or not the data represents a message key. /// </param> /// <returns> /// <paramref name="data" /> serialized as a byte array. /// </returns> public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey) { try { int schemaId; global::Avro.Schema writerSchema; await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { // TODO: If any of these caches fills up, this is probably an // indication of misuse of the serializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas || registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas || schemaIds.Count > schemaRegistryClient.MaxCachedSchemas) { knownSchemas.Clear(); registeredSchemas.Clear(); schemaIds.Clear(); } // Determine a schema string corresponding to the schema object. // TODO: It would be more efficient to use a hash function based // on the instance reference, not the implementation provided by // Schema. writerSchema = data.Schema; string writerSchemaString = null; if (knownSchemas.ContainsKey(writerSchema)) { writerSchemaString = knownSchemas[writerSchema]; } else { writerSchemaString = writerSchema.ToString(); knownSchemas.Add(writerSchema, writerSchemaString); } // Verify schema compatibility (& register as required) + get the // id corresponding to the schema. // TODO: Again, the hash functions in use below are potentially // slow since writerSchemaString is potentially long. It would be // better to use hash functions based on the writerSchemaString // object reference, not value. string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), data.Schema.Fullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : isKey ? schemaRegistryClient.ConstructKeySubjectName(topic, data.Schema.Fullname) : schemaRegistryClient.ConstructValueSubjectName(topic, data.Schema.Fullname); var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString); if (!registeredSchemas.Contains(subjectSchemaPair)) { int newSchemaId; // first usage: register/get schema to check compatibility if (autoRegisterSchema) { newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } // https://www.confluent.io/blog/multiple-event-types-in-the-same-kafka-topic/ else if (useLatestSchema) { RegisteredSchema regSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject) .ConfigureAwait(continueOnCapturedContext: false); //Do we have an Avro union with schema references if (regSchema.References.Any() && IsUnion(regSchema.SchemaString)) { RegisteredSchema registeredRefSchema = null; StringBuilder schemaBuilder = new StringBuilder(); schemaBuilder.Append("["); //We need to loop the schema references and perform a schema registry lookup // in order to check compability with referencced schema foreach (var refSchemaString in regSchema.References) { registeredRefSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchemaString.Subject, refSchemaString.Version) .ConfigureAwait(continueOnCapturedContext: false); Avro.Schema refSchema = Avro.Schema.Parse(registeredRefSchema.SchemaString); if (refSchema.Tag != Avro.Schema.Type.Record) { throw new NotSupportedException("Only union schemas containing references to a record are supported for now"); } schemaBuilder.Append($"{registeredRefSchema.SchemaString}"); if (regSchema.References.Last() != refSchemaString) { schemaBuilder.Append(","); } } schemaBuilder.Append("]"); unionSchemas[writerSchema] = global::Avro.Schema.Parse(schemaBuilder.ToString()); newSchemaId = regSchema.Id; // subjectSchemaPair = new KeyValuePair<string, string>(subject, writerSchema.ToString()); } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString) .ConfigureAwait(continueOnCapturedContext: false); } } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } if (!schemaIds.ContainsKey(writerSchemaString)) { schemaIds.Add(writerSchemaString, newSchemaId); } else if (schemaIds[writerSchemaString] != newSchemaId) { schemaIds.Clear(); registeredSchemas.Clear(); throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema.")); } registeredSchemas.Add(subjectSchemaPair); } schemaId = schemaIds[writerSchemaString]; } finally { serializeMutex.Release(); } Avro.Schema unionSchema; if (unionSchemas.TryGetValue(writerSchema, out unionSchema)) { writerSchema = unionSchema; } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId)); new GenericWriter <GenericRecord>(writerSchema) .Write(data, new BinaryEncoder(stream)); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Serialize GenericRecord instance to a byte array in Avro format. The serialized /// data is preceded by a "magic byte" (1 byte) and the id of the schema as registered /// in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw /// on first use for a particular topic during schema registration. /// </summary> /// <param name="topic"> /// The topic associated with the data. /// </param> /// <param name="data"> /// The object to serialize. /// </param> /// <param name="isKey"> /// whether or not the data represents a message key. /// </param> /// <returns> /// <paramref name="data" /> serialized as a byte array. /// </returns> public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey) { try { int schemaId; global::Avro.RecordSchema writerSchema; await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { // TODO: If any of these caches fills up, this is probably an // indication of misuse of the serializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas || registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas || schemaIds.Count > schemaRegistryClient.MaxCachedSchemas) { knownSchemas.Clear(); registeredSchemas.Clear(); schemaIds.Clear(); } // Determine a schema string corresponding to the schema object. // TODO: It would be more efficient to use a hash function based // on the instance reference, not the implementation provided by // Schema. writerSchema = data.Schema; string writerSchemaString = null; if (knownSchemas.ContainsKey(writerSchema)) { writerSchemaString = knownSchemas[writerSchema]; } else { writerSchemaString = writerSchema.ToString(); knownSchemas.Add(writerSchema, writerSchemaString); } // Verify schema compatibility (& register as required) + get the // id corresponding to the schema. // TODO: Again, the hash functions in use below are potentially // slow since writerSchemaString is potentially long. It would be // better to use hash functions based on the writerSchemaString // object reference, not value. string subject = isKey ? schemaRegistryClient.ConstructKeySubjectName(topic) : schemaRegistryClient.ConstructValueSubjectName(topic); var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString); if (!registeredSchemas.Contains(subjectSchemaPair)) { int newSchemaId; // first usage: register/get schema to check compatibility if (autoRegisterSchema) { newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } if (!schemaIds.ContainsKey(writerSchemaString)) { schemaIds.Add(writerSchemaString, newSchemaId); } else if (schemaIds[writerSchemaString] != newSchemaId) { schemaIds.Clear(); registeredSchemas.Clear(); throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema.")); } registeredSchemas.Add(subjectSchemaPair); } schemaId = schemaIds[writerSchemaString]; } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId)); new GenericWriter <GenericRecord>(writerSchema) .Write(data, new BinaryEncoder(stream)); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Serialize an instance of type <typeparamref name="T"/> to a byte array /// in Protobuf format. The serialized data is preceeded by: /// 1. A "magic byte" (1 byte) that identifies this as a message with /// Confluent Platform framing. /// 2. The id of the schema as registered in Confluent's Schema Registry /// (4 bytes, network byte order). /// 3. An size-prefixed array of indices that identify the specific message /// type in the schema (a given schema can contain many message types /// and they can be nested). Size and indices are unsigned varints. The /// common case where the message type is the first message in the schema /// (i.e. index data would be [1,0]) is encoded as simply a single 0 byte /// as an optimization. /// This call may block or throw on first use for a particular topic during /// schema registration / verification. /// </summary> /// <param name="value"> /// The value to serialize. /// </param> /// <param name="context"> /// Context relevant to the serialize operation. /// </param> /// <returns> /// A <see cref="System.Threading.Tasks.Task" /> that completes with /// <paramref name="value" /> serialized as a byte array. /// </returns> public async Task <byte[]> SerializeAsync(T value, SerializationContext context) { if (value == null) { return(null); } try { if (this.indexArray == null) { this.indexArray = createIndexArray(value.Descriptor, useDeprecatedFormat); } string fullname = value.Descriptor.FullName; await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(context, fullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : context.Component == MessageComponentType.Key ? schemaRegistryClient.ConstructKeySubjectName(context.Topic, fullname) : schemaRegistryClient.ConstructValueSubjectName(context.Topic, fullname); if (!subjectsRegistered.Contains(subject)) { if (useLatestVersion) { var latestSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject) .ConfigureAwait(continueOnCapturedContext: false); schemaId = latestSchema.Id; } else { var references = await RegisterOrGetReferences(value.Descriptor.File, context, autoRegisterSchema, skipKnownTypes) .ConfigureAwait(continueOnCapturedContext: false); // first usage: register/get schema to check compatibility schemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, new Schema(value.Descriptor.File.SerializedData.ToBase64(), references, SchemaType.Protobuf)) .ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, new Schema(value.Descriptor.File.SerializedData.ToBase64(), references, SchemaType.Protobuf)) .ConfigureAwait(continueOnCapturedContext: false); // note: different values for schemaId should never be seen here. // TODO: but fail fast may be better here. } subjectsRegistered.Add(subject); } } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId.Value)); writer.Write(this.indexArray); value.WriteTo(stream); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Serialize an instance of type <typeparamref name="T"/> to a UTF8 encoded JSON /// represenation. The serialized data is preceeded by: /// 1. A "magic byte" (1 byte) that identifies this as a message with /// Confluent Platform framing. /// 2. The id of the schema as registered in Confluent's Schema Registry /// (4 bytes, network byte order). /// This call may block or throw on first use for a particular topic during /// schema registration / verification. /// </summary> /// <param name="value"> /// The value to serialize. /// </param> /// <param name="context"> /// Context relevant to the serialize operation. /// </param> /// <returns> /// A <see cref="System.Threading.Tasks.Task" /> that completes with /// <paramref name="value" /> serialized as a byte array. /// </returns> public async Task <byte[]> SerializeAsync(T value, SerializationContext context) { if (value == null) { return(null); } var serializedString = Newtonsoft.Json.JsonConvert.SerializeObject(value); var validationResult = validator.Validate(serializedString, this.schema); if (validationResult.Count > 0) { throw new InvalidDataException("Schema validation failed for properties: [" + string.Join(", ", validationResult.Select(r => r.Path) + "]")); } try { await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(context, this.schemaFullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : context.Component == MessageComponentType.Key ? schemaRegistryClient.ConstructKeySubjectName(context.Topic, this.schemaFullname) : schemaRegistryClient.ConstructValueSubjectName(context.Topic, this.schemaFullname); if (!subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility schemaId = autoRegisterSchema ? await schemaRegistryClient.RegisterSchemaAsync(subject, new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json)) .ConfigureAwait(continueOnCapturedContext: false) : await schemaRegistryClient.GetSchemaIdAsync(subject, new Schema(this.schemaText, EmptyReferencesList, SchemaType.Json)) .ConfigureAwait(continueOnCapturedContext: false); // TODO: It may be better to fail fast if conflicting values for schemaId are seen here. subjectsRegistered.Add(subject); } } finally { serializeMutex.Release(); } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId.Value)); writer.Write(System.Text.Encoding.UTF8.GetBytes(serializedString)); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }