static void Main(string[] args) { String schema = new StreamReader("user.avsc").ReadToEnd(); Avro.Schema avschema = Avro.Schema.Parse(schema); DatumReader <User> reader = new Avro.Specific.SpecificDatumReader <User>(avschema, avschema); Stream inStr = new FileStream("users.avro", FileMode.Open); IFileReader <User> dataFileReader = DataFileReader <User> .OpenReader(inStr, avschema); while (dataFileReader.HasNext()) { User record = dataFileReader.Next(); Console.WriteLine("Specific Obj Read ==>" + record.name + ":" + record.favorite_color + ":" + record.favorite_number); } inStr.Close(); inStr = new FileStream("users.avro", FileMode.Open); DatumReader <GenericRecord> reader2 = new Avro.Generic.GenericDatumReader <GenericRecord>(avschema, avschema); IFileReader <GenericRecord> gdataFileReader = DataFileReader <GenericRecord> .OpenReader(inStr, avschema); while (gdataFileReader.HasNext()) { GenericRecord grecord = gdataFileReader.Next(); Console.WriteLine("Generic mode of read==>" + grecord["name"] + ":" + grecord["favorite_color"] + ":" + grecord["favorite_number"]); } Console.Write("Hit ENTER to Close:"); Console.ReadLine(); }
private void Initialize() { Type writerType = typeof(T); if (typeof(ISpecificRecord).IsAssignableFrom(writerType) || writerType.IsSubclassOf(typeof(SpecificFixed))) { WriterSchema = (Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null); } else if (writerType.Equals(typeof(int))) { WriterSchema = Avro.Schema.Parse("int"); } else if (writerType.Equals(typeof(bool))) { WriterSchema = Avro.Schema.Parse("boolean"); } else if (writerType.Equals(typeof(double))) { WriterSchema = Avro.Schema.Parse("double"); } else if (writerType.Equals(typeof(string))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET string type, however we don't for consistency // with the Java avro serializer. WriterSchema = Avro.Schema.Parse("string"); } else if (writerType.Equals(typeof(float))) { WriterSchema = Avro.Schema.Parse("float"); } else if (writerType.Equals(typeof(long))) { WriterSchema = Avro.Schema.Parse("long"); } else if (writerType.Equals(typeof(byte[]))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET byte[] type, however we don't for consistency // with the Java avro serializer. WriterSchema = Avro.Schema.Parse("bytes"); } else { throw new ArgumentException( $"{nameof(AvroSerializer<T>)} " + "only accepts type parameters of int, bool, double, string, float, " + "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed." ); } avroWriter = new SpecificWriter <T>(WriterSchema); writerSchemaString = WriterSchema.ToString(); }
public AvroReader(Avro.Schema writeSchema, Avro.Schema readSchema) { var type = typeof(T); if (typeof(ISpecificRecord).IsAssignableFrom(type)) { _reader = new SpecificDatumReader <T>(writeSchema, readSchema); } else { _reader = new ReflectReader <T>(writeSchema, readSchema); } }
public AvroWriter(Avro.Schema avroSchema) { _schema = avroSchema; var type = typeof(T); if (typeof(ISpecificRecord).IsAssignableFrom(type)) { _writer = new SpecificDatumWriter <T>(avroSchema); } else { _writer = new ReflectWriter <T>(_schema); } }
public SpecificDeserializerImpl(ISchemaRegistryClient schemaRegistryClient) { this.schemaRegistryClient = schemaRegistryClient; if (typeof(ISpecificRecord).IsAssignableFrom(typeof(T))) { ReaderSchema = (Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null); } else if (typeof(T).Equals(typeof(int))) { ReaderSchema = Avro.Schema.Parse("int"); } else if (typeof(T).Equals(typeof(bool))) { ReaderSchema = Avro.Schema.Parse("boolean"); } else if (typeof(T).Equals(typeof(double))) { ReaderSchema = Avro.Schema.Parse("double"); } else if (typeof(T).Equals(typeof(string))) { ReaderSchema = Avro.Schema.Parse("string"); } else if (typeof(T).Equals(typeof(float))) { ReaderSchema = Avro.Schema.Parse("float"); } else if (typeof(T).Equals(typeof(long))) { ReaderSchema = Avro.Schema.Parse("long"); } else if (typeof(T).Equals(typeof(byte[]))) { ReaderSchema = Avro.Schema.Parse("bytes"); } else if (typeof(T).Equals(typeof(Null))) { ReaderSchema = Avro.Schema.Parse("null"); } else { throw new ArgumentException( $"{nameof(AvroDeserializer<T>)} " + "only accepts type parameters of int, bool, double, string, float, " + "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed." ); } }
/// <inheritdoc/> public async Task <GenericRecord> Deserialize(string topic, byte[] array) { try { // Note: topic is not necessary for deserialization (or knowing if it's a key // or value) only the schema id is needed. if (array.Length < 5) { throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes"); } using (var stream = new MemoryStream(array)) using (var reader = new BinaryReader(stream)) { var magicByte = reader.ReadByte(); if (magicByte != Constants.MagicByte) { throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}"); } var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32()); DatumReader <GenericRecord> datumReader; await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { datumReaderBySchemaId.TryGetValue(writerId, out datumReader); if (datumReader == null) { // TODO: If any of this cache fills up, this is probably an // indication of misuse of the deserializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas) { datumReaderBySchemaId.Clear(); } var writerSchemaResult = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false); if (writerSchemaResult.SchemaType != SchemaType.Avro) { throw new InvalidOperationException("Expecting writer schema to have type Avro, not {writerSchemaResult.SchemaType}"); } Avro.Schema writerSchema = null; if (writerSchemaResult.References.Any() && IsUnion(writerSchemaResult.SchemaString)) { StringBuilder schemaBuilder = new StringBuilder(); schemaBuilder.Append("["); foreach (var refSchema in writerSchemaResult.References) { var regSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchema.Subject, refSchema.Version) .ConfigureAwait(continueOnCapturedContext: false); Avro.Schema schema = Avro.Schema.Parse(regSchema.SchemaString); if (schema.Tag != Avro.Schema.Type.Record) { throw new NotSupportedException("Only union schemas containing references to a record are supported for now"); } schemaBuilder.Append($"{regSchema.SchemaString}"); if (writerSchemaResult.References.Last() != refSchema) { schemaBuilder.Append(", "); } } schemaBuilder.Append("]"); writerSchema = global::Avro.Schema.Parse(schemaBuilder.ToString()); } else { writerSchema = global::Avro.Schema.Parse(writerSchemaResult.SchemaString); } datumReader = new GenericReader <GenericRecord>(writerSchema, writerSchema); datumReaderBySchemaId[writerId] = datumReader; } } finally { deserializeMutex.Release(); } return(datumReader.Read(default(GenericRecord), new BinaryDecoder(stream))); } } catch (AggregateException e) { throw e.InnerException; } }
/// <summary> /// Serialize GenericRecord instance to a byte array in Avro format. The serialized /// data is preceded by a "magic byte" (1 byte) and the id of the schema as registered /// in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw /// on first use for a particular topic during schema registration. /// </summary> /// <param name="topic"> /// The topic associated with the data. /// </param> /// <param name="data"> /// The object to serialize. /// </param> /// <param name="isKey"> /// whether or not the data represents a message key. /// </param> /// <returns> /// <paramref name="data" /> serialized as a byte array. /// </returns> public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey) { try { int schemaId; global::Avro.Schema writerSchema; await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { // TODO: If any of these caches fills up, this is probably an // indication of misuse of the serializer. Ideally we would do // something more sophisticated than the below + not allow // the misuse to keep happening without warning. if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas || registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas || schemaIds.Count > schemaRegistryClient.MaxCachedSchemas) { knownSchemas.Clear(); registeredSchemas.Clear(); schemaIds.Clear(); } // Determine a schema string corresponding to the schema object. // TODO: It would be more efficient to use a hash function based // on the instance reference, not the implementation provided by // Schema. writerSchema = data.Schema; string writerSchemaString = null; if (knownSchemas.ContainsKey(writerSchema)) { writerSchemaString = knownSchemas[writerSchema]; } else { writerSchemaString = writerSchema.ToString(); knownSchemas.Add(writerSchema, writerSchemaString); } // Verify schema compatibility (& register as required) + get the // id corresponding to the schema. // TODO: Again, the hash functions in use below are potentially // slow since writerSchemaString is potentially long. It would be // better to use hash functions based on the writerSchemaString // object reference, not value. string subject = this.subjectNameStrategy != null // use the subject name strategy specified in the serializer config if available. ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), data.Schema.Fullname) // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry. : isKey ? schemaRegistryClient.ConstructKeySubjectName(topic, data.Schema.Fullname) : schemaRegistryClient.ConstructValueSubjectName(topic, data.Schema.Fullname); var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString); if (!registeredSchemas.Contains(subjectSchemaPair)) { int newSchemaId; // first usage: register/get schema to check compatibility if (autoRegisterSchema) { newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } // https://www.confluent.io/blog/multiple-event-types-in-the-same-kafka-topic/ else if (useLatestSchema) { RegisteredSchema regSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject) .ConfigureAwait(continueOnCapturedContext: false); //Do we have an Avro union with schema references if (regSchema.References.Any() && IsUnion(regSchema.SchemaString)) { RegisteredSchema registeredRefSchema = null; StringBuilder schemaBuilder = new StringBuilder(); schemaBuilder.Append("["); //We need to loop the schema references and perform a schema registry lookup // in order to check compability with referencced schema foreach (var refSchemaString in regSchema.References) { registeredRefSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchemaString.Subject, refSchemaString.Version) .ConfigureAwait(continueOnCapturedContext: false); Avro.Schema refSchema = Avro.Schema.Parse(registeredRefSchema.SchemaString); if (refSchema.Tag != Avro.Schema.Type.Record) { throw new NotSupportedException("Only union schemas containing references to a record are supported for now"); } schemaBuilder.Append($"{registeredRefSchema.SchemaString}"); if (regSchema.References.Last() != refSchemaString) { schemaBuilder.Append(","); } } schemaBuilder.Append("]"); unionSchemas[writerSchema] = global::Avro.Schema.Parse(schemaBuilder.ToString()); newSchemaId = regSchema.Id; // subjectSchemaPair = new KeyValuePair<string, string>(subject, writerSchema.ToString()); } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString) .ConfigureAwait(continueOnCapturedContext: false); } } else { newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); } if (!schemaIds.ContainsKey(writerSchemaString)) { schemaIds.Add(writerSchemaString, newSchemaId); } else if (schemaIds[writerSchemaString] != newSchemaId) { schemaIds.Clear(); registeredSchemas.Clear(); throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema.")); } registeredSchemas.Add(subjectSchemaPair); } schemaId = schemaIds[writerSchemaString]; } finally { serializeMutex.Release(); } Avro.Schema unionSchema; if (unionSchemas.TryGetValue(writerSchema, out unionSchema)) { writerSchema = unionSchema; } using (var stream = new MemoryStream(initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(Constants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(schemaId)); new GenericWriter <GenericRecord>(writerSchema) .Write(data, new BinaryEncoder(stream)); return(stream.ToArray()); } } catch (AggregateException e) { throw e.InnerException; } }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { Avro.Schema avschema = null; if (!string.IsNullOrWhiteSpace(_avroSchema)) { avschema = Avro.Schema.Parse(_avroSchema); } IFileReader <GenericRecord> fileReader = null; using (var stream = new UnstructuredReaderAvroWrapper(input)) { var foundSchema = false; if (_mapToInternalSchema) { fileReader = DataFileReader <GenericRecord> .OpenReader(stream); var schema = fileReader.GetSchema(); foundSchema = schema != null; } if (!foundSchema) { stream.Position = 0; fileReader = DataFileReader <GenericRecord> .OpenReader(stream, avschema); } while (fileReader?.HasNext() == true) { var avroRecord = fileReader.Next(); foreach (var column in output.Schema) { if (avroRecord.TryGetValue(column.Name, out var obj)) { if (column.Type.IsInstanceOfType(obj)) { output.Set(column.Name, obj); } else { if (obj == null || _ignoreColumnMismatches) { output.Set(column.Name, column.DefaultValue); } else { throw new Exception($"Column type mismatch. Output column {column.Name} of type {column.Type} is not an instance of avro file type {obj.GetType()}"); } } } else { if (_ignoreColumnMismatches) { output.Set(column.Name, column.DefaultValue); } else { var fieldsString = string.Join(", ", avroRecord.Schema.Fields.Select(field => field.Name)); throw new Exception($"Column mismatch. Output schema column {column.Name} does not exist in avro schema fields: [{fieldsString}]"); } } } yield return(output.AsReadOnly()); } } }
public SpecificSerializerImpl( ISchemaRegistryClient schemaRegistryClient, bool autoRegisterSchema, int initialBufferSize, bool isKey) { this.schemaRegistryClient = schemaRegistryClient; this.autoRegisterSchema = autoRegisterSchema; this.initialBufferSize = initialBufferSize; this.isKey = isKey; Type writerType = typeof(T); if (typeof(ISpecificRecord).IsAssignableFrom(writerType)) { writerSchema = (Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null); } else if (writerType.Equals(typeof(int))) { writerSchema = Avro.Schema.Parse("int"); } else if (writerType.Equals(typeof(bool))) { writerSchema = Avro.Schema.Parse("boolean"); } else if (writerType.Equals(typeof(double))) { writerSchema = Avro.Schema.Parse("double"); } else if (writerType.Equals(typeof(string))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET string type, however we don't for consistency // with the Java avro serializer. writerSchema = Avro.Schema.Parse("string"); } else if (writerType.Equals(typeof(float))) { writerSchema = Avro.Schema.Parse("float"); } else if (writerType.Equals(typeof(long))) { writerSchema = Avro.Schema.Parse("long"); } else if (writerType.Equals(typeof(byte[]))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET byte[] type, however we don't for consistency // with the Java avro serializer. writerSchema = Avro.Schema.Parse("bytes"); } else if (writerType.Equals(typeof(Null))) { writerSchema = Avro.Schema.Parse("null"); } else { throw new ArgumentException( $"{nameof(AvroSerializer<T>)} " + "only accepts type parameters of int, bool, double, string, float, " + "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed." ); } avroWriter = new SpecificWriter <T>(writerSchema); writerSchemaString = writerSchema.ToString(); }
public GenericAvroRecord(byte[] schemaVersion, Avro.Schema schema, IList <Field> fields, Avro.Generic.GenericRecord record) : base(schemaVersion, fields) { _schema = schema; _record = record; }
public MultiVersionGenericAvroReader(bool useProvidedSchemaAsReaderSchema, Avro.Schema readerSchema) : base(useProvidedSchemaAsReaderSchema, new GenericAvroReader(readerSchema), readerSchema) { }
protected internal AbstractMultiVersionGenericReader(bool useProvidedSchemaAsReaderSchema, ISchemaReader <IGenericRecord> providerSchemaReader, Avro.Schema readerSchema) : base(providerSchemaReader, readerSchema) { this.useProvidedSchemaAsReaderSchema = useProvidedSchemaAsReaderSchema; }
public AbstractMultiVersionAvroBaseReader(ISchemaReader <T> providerSchemaReader, Avro.Schema readerSchema) : base(providerSchemaReader) { ReaderSchema = readerSchema; }
public AvroBaseStructSchema(ISchemaInfo schemaInfo) : base(schemaInfo) { schema = SchemaUtils.ParseAvroSchema(Encoding.UTF8.GetString(schemaInfo.Schema)); }