Пример #1
0
        static void Main(string[] args)
        {
            String schema = new StreamReader("user.avsc").ReadToEnd();

            Avro.Schema avschema = Avro.Schema.Parse(schema);


            DatumReader <User> reader         = new Avro.Specific.SpecificDatumReader <User>(avschema, avschema);
            Stream             inStr          = new FileStream("users.avro", FileMode.Open);
            IFileReader <User> dataFileReader = DataFileReader <User> .OpenReader(inStr, avschema);

            while (dataFileReader.HasNext())
            {
                User record = dataFileReader.Next();
                Console.WriteLine("Specific Obj Read ==>" + record.name + ":" + record.favorite_color + ":" + record.favorite_number);
            }
            inStr.Close();

            inStr = new FileStream("users.avro", FileMode.Open);
            DatumReader <GenericRecord> reader2         = new Avro.Generic.GenericDatumReader <GenericRecord>(avschema, avschema);
            IFileReader <GenericRecord> gdataFileReader = DataFileReader <GenericRecord> .OpenReader(inStr, avschema);

            while (gdataFileReader.HasNext())
            {
                GenericRecord grecord = gdataFileReader.Next();
                Console.WriteLine("Generic mode of read==>" + grecord["name"] + ":" + grecord["favorite_color"] + ":" + grecord["favorite_number"]);
            }

            Console.Write("Hit ENTER to Close:");
            Console.ReadLine();
        }
Пример #2
0
        private void Initialize()
        {
            Type writerType = typeof(T);

            if (typeof(ISpecificRecord).IsAssignableFrom(writerType) || writerType.IsSubclassOf(typeof(SpecificFixed)))
            {
                WriterSchema = (Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null);
            }
            else if (writerType.Equals(typeof(int)))
            {
                WriterSchema = Avro.Schema.Parse("int");
            }
            else if (writerType.Equals(typeof(bool)))
            {
                WriterSchema = Avro.Schema.Parse("boolean");
            }
            else if (writerType.Equals(typeof(double)))
            {
                WriterSchema = Avro.Schema.Parse("double");
            }
            else if (writerType.Equals(typeof(string)))
            {
                // Note: It would arguably be better to make this a union with null, to
                // exactly match the .NET string type, however we don't for consistency
                // with the Java avro serializer.
                WriterSchema = Avro.Schema.Parse("string");
            }
            else if (writerType.Equals(typeof(float)))
            {
                WriterSchema = Avro.Schema.Parse("float");
            }
            else if (writerType.Equals(typeof(long)))
            {
                WriterSchema = Avro.Schema.Parse("long");
            }
            else if (writerType.Equals(typeof(byte[])))
            {
                // Note: It would arguably be better to make this a union with null, to
                // exactly match the .NET byte[] type, however we don't for consistency
                // with the Java avro serializer.
                WriterSchema = Avro.Schema.Parse("bytes");
            }
            else
            {
                throw new ArgumentException(
                          $"{nameof(AvroSerializer<T>)} " +
                          "only accepts type parameters of int, bool, double, string, float, " +
                          "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed."
                          );
            }

            avroWriter         = new SpecificWriter <T>(WriterSchema);
            writerSchemaString = WriterSchema.ToString();
        }
Пример #3
0
        public AvroReader(Avro.Schema writeSchema, Avro.Schema readSchema)
        {
            var type = typeof(T);

            if (typeof(ISpecificRecord).IsAssignableFrom(type))
            {
                _reader = new SpecificDatumReader <T>(writeSchema, readSchema);
            }
            else
            {
                _reader = new ReflectReader <T>(writeSchema, readSchema);
            }
        }
Пример #4
0
        public AvroWriter(Avro.Schema avroSchema)
        {
            _schema = avroSchema;
            var type = typeof(T);

            if (typeof(ISpecificRecord).IsAssignableFrom(type))
            {
                _writer = new SpecificDatumWriter <T>(avroSchema);
            }
            else
            {
                _writer = new ReflectWriter <T>(_schema);
            }
        }
        public SpecificDeserializerImpl(ISchemaRegistryClient schemaRegistryClient)
        {
            this.schemaRegistryClient = schemaRegistryClient;

            if (typeof(ISpecificRecord).IsAssignableFrom(typeof(T)))
            {
                ReaderSchema = (Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null);
            }
            else if (typeof(T).Equals(typeof(int)))
            {
                ReaderSchema = Avro.Schema.Parse("int");
            }
            else if (typeof(T).Equals(typeof(bool)))
            {
                ReaderSchema = Avro.Schema.Parse("boolean");
            }
            else if (typeof(T).Equals(typeof(double)))
            {
                ReaderSchema = Avro.Schema.Parse("double");
            }
            else if (typeof(T).Equals(typeof(string)))
            {
                ReaderSchema = Avro.Schema.Parse("string");
            }
            else if (typeof(T).Equals(typeof(float)))
            {
                ReaderSchema = Avro.Schema.Parse("float");
            }
            else if (typeof(T).Equals(typeof(long)))
            {
                ReaderSchema = Avro.Schema.Parse("long");
            }
            else if (typeof(T).Equals(typeof(byte[])))
            {
                ReaderSchema = Avro.Schema.Parse("bytes");
            }
            else if (typeof(T).Equals(typeof(Null)))
            {
                ReaderSchema = Avro.Schema.Parse("null");
            }
            else
            {
                throw new ArgumentException(
                          $"{nameof(AvroDeserializer<T>)} " +
                          "only accepts type parameters of int, bool, double, string, float, " +
                          "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed."
                          );
            }
        }
Пример #6
0
        /// <inheritdoc/>
        public async Task <GenericRecord> Deserialize(string topic, byte[] array)
        {
            try
            {
                // Note: topic is not necessary for deserialization (or knowing if it's a key
                // or value) only the schema id is needed.

                if (array.Length < 5)
                {
                    throw new InvalidDataException($"Expecting data framing of length 5 bytes or more but total data size is {array.Length} bytes");
                }

                using (var stream = new MemoryStream(array))
                    using (var reader = new BinaryReader(stream))
                    {
                        var magicByte = reader.ReadByte();
                        if (magicByte != Constants.MagicByte)
                        {
                            throw new InvalidDataException($"Expecting data with Confluent Schema Registry framing. Magic byte was {array[0]}, expecting {Constants.MagicByte}");
                        }
                        var writerId = IPAddress.NetworkToHostOrder(reader.ReadInt32());

                        DatumReader <GenericRecord> datumReader;
                        await deserializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                        try
                        {
                            datumReaderBySchemaId.TryGetValue(writerId, out datumReader);
                            if (datumReader == null)
                            {
                                // TODO: If any of this cache fills up, this is probably an
                                // indication of misuse of the deserializer. Ideally we would do
                                // something more sophisticated than the below + not allow
                                // the misuse to keep happening without warning.
                                if (datumReaderBySchemaId.Count > schemaRegistryClient.MaxCachedSchemas)
                                {
                                    datumReaderBySchemaId.Clear();
                                }

                                var writerSchemaResult = await schemaRegistryClient.GetSchemaAsync(writerId).ConfigureAwait(continueOnCapturedContext: false);

                                if (writerSchemaResult.SchemaType != SchemaType.Avro)
                                {
                                    throw new InvalidOperationException("Expecting writer schema to have type Avro, not {writerSchemaResult.SchemaType}");
                                }

                                Avro.Schema writerSchema = null;
                                if (writerSchemaResult.References.Any() && IsUnion(writerSchemaResult.SchemaString))
                                {
                                    StringBuilder schemaBuilder = new StringBuilder();
                                    schemaBuilder.Append("[");
                                    foreach (var refSchema in writerSchemaResult.References)
                                    {
                                        var regSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchema.Subject,
                                                                                                            refSchema.Version)
                                                        .ConfigureAwait(continueOnCapturedContext: false);

                                        Avro.Schema schema = Avro.Schema.Parse(regSchema.SchemaString);

                                        if (schema.Tag != Avro.Schema.Type.Record)
                                        {
                                            throw new NotSupportedException("Only union schemas containing references to a record are supported for now");
                                        }

                                        schemaBuilder.Append($"{regSchema.SchemaString}");
                                        if (writerSchemaResult.References.Last() != refSchema)
                                        {
                                            schemaBuilder.Append(", ");
                                        }
                                    }

                                    schemaBuilder.Append("]");

                                    writerSchema = global::Avro.Schema.Parse(schemaBuilder.ToString());
                                }
                                else
                                {
                                    writerSchema = global::Avro.Schema.Parse(writerSchemaResult.SchemaString);
                                }

                                datumReader = new GenericReader <GenericRecord>(writerSchema, writerSchema);
                                datumReaderBySchemaId[writerId] = datumReader;
                            }
                        }
                        finally
                        {
                            deserializeMutex.Release();
                        }

                        return(datumReader.Read(default(GenericRecord), new BinaryDecoder(stream)));
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
Пример #7
0
        /// <summary>
        ///     Serialize GenericRecord instance to a byte array in Avro format. The serialized
        ///     data is preceded by a "magic byte" (1 byte) and the id of the schema as registered
        ///     in Confluent's Schema Registry (4 bytes, network byte order). This call may block or throw
        ///     on first use for a particular topic during schema registration.
        /// </summary>
        /// <param name="topic">
        ///     The topic associated with the data.
        /// </param>
        /// <param name="data">
        ///     The object to serialize.
        /// </param>
        /// <param name="isKey">
        ///     whether or not the data represents a message key.
        /// </param>
        /// <returns>
        ///     <paramref name="data" /> serialized as a byte array.
        /// </returns>
        public async Task <byte[]> Serialize(string topic, GenericRecord data, bool isKey)
        {
            try
            {
                int schemaId;
                global::Avro.Schema writerSchema;
                await serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);

                try
                {
                    // TODO: If any of these caches fills up, this is probably an
                    // indication of misuse of the serializer. Ideally we would do
                    // something more sophisticated than the below + not allow
                    // the misuse to keep happening without warning.
                    if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
                        schemaIds.Count > schemaRegistryClient.MaxCachedSchemas)
                    {
                        knownSchemas.Clear();
                        registeredSchemas.Clear();
                        schemaIds.Clear();
                    }

                    // Determine a schema string corresponding to the schema object.
                    // TODO: It would be more efficient to use a hash function based
                    // on the instance reference, not the implementation provided by
                    // Schema.
                    writerSchema = data.Schema;
                    string writerSchemaString = null;
                    if (knownSchemas.ContainsKey(writerSchema))
                    {
                        writerSchemaString = knownSchemas[writerSchema];
                    }
                    else
                    {
                        writerSchemaString = writerSchema.ToString();
                        knownSchemas.Add(writerSchema, writerSchemaString);
                    }

                    // Verify schema compatibility (& register as required) + get the
                    // id corresponding to the schema.

                    // TODO: Again, the hash functions in use below are potentially
                    // slow since writerSchemaString is potentially long. It would be
                    // better to use hash functions based on the writerSchemaString
                    // object reference, not value.

                    string subject = this.subjectNameStrategy != null
                                     // use the subject name strategy specified in the serializer config if available.
                        ? this.subjectNameStrategy(new SerializationContext(isKey ? MessageComponentType.Key : MessageComponentType.Value, topic), data.Schema.Fullname)
                                     // else fall back to the deprecated config from (or default as currently supplied by) SchemaRegistry.
                        : isKey
                            ? schemaRegistryClient.ConstructKeySubjectName(topic, data.Schema.Fullname)
                            : schemaRegistryClient.ConstructValueSubjectName(topic, data.Schema.Fullname);

                    var subjectSchemaPair = new KeyValuePair <string, string>(subject, writerSchemaString);
                    if (!registeredSchemas.Contains(subjectSchemaPair))
                    {
                        int newSchemaId;
                        // first usage: register/get schema to check compatibility
                        if (autoRegisterSchema)
                        {
                            newSchemaId = await schemaRegistryClient.RegisterSchemaAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }
                        // https://www.confluent.io/blog/multiple-event-types-in-the-same-kafka-topic/
                        else if (useLatestSchema)
                        {
                            RegisteredSchema regSchema = await schemaRegistryClient.GetLatestSchemaAsync(subject)
                                                         .ConfigureAwait(continueOnCapturedContext: false);

                            //Do we have an Avro union with schema references
                            if (regSchema.References.Any() && IsUnion(regSchema.SchemaString))
                            {
                                RegisteredSchema registeredRefSchema = null;
                                StringBuilder    schemaBuilder       = new StringBuilder();
                                schemaBuilder.Append("[");
                                //We need to loop the schema references and perform a schema registry lookup
                                // in order to check compability with referencced schema
                                foreach (var refSchemaString in regSchema.References)
                                {
                                    registeredRefSchema = await schemaRegistryClient.GetRegisteredSchemaAsync(refSchemaString.Subject,
                                                                                                              refSchemaString.Version)
                                                          .ConfigureAwait(continueOnCapturedContext: false);

                                    Avro.Schema refSchema = Avro.Schema.Parse(registeredRefSchema.SchemaString);

                                    if (refSchema.Tag != Avro.Schema.Type.Record)
                                    {
                                        throw new NotSupportedException("Only union schemas containing references to a record are supported for now");
                                    }

                                    schemaBuilder.Append($"{registeredRefSchema.SchemaString}");
                                    if (regSchema.References.Last() != refSchemaString)
                                    {
                                        schemaBuilder.Append(",");
                                    }
                                }

                                schemaBuilder.Append("]");
                                unionSchemas[writerSchema] = global::Avro.Schema.Parse(schemaBuilder.ToString());
                                newSchemaId = regSchema.Id;
                                // subjectSchemaPair = new KeyValuePair<string, string>(subject, writerSchema.ToString());
                            }
                            else
                            {
                                newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString)
                                              .ConfigureAwait(continueOnCapturedContext: false);
                            }
                        }
                        else
                        {
                            newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString).ConfigureAwait(continueOnCapturedContext: false);
                        }

                        if (!schemaIds.ContainsKey(writerSchemaString))
                        {
                            schemaIds.Add(writerSchemaString, newSchemaId);
                        }
                        else if (schemaIds[writerSchemaString] != newSchemaId)
                        {
                            schemaIds.Clear();
                            registeredSchemas.Clear();
                            throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema."));
                        }

                        registeredSchemas.Add(subjectSchemaPair);
                    }

                    schemaId = schemaIds[writerSchemaString];
                }
                finally
                {
                    serializeMutex.Release();
                }

                Avro.Schema unionSchema;
                if (unionSchemas.TryGetValue(writerSchema, out unionSchema))
                {
                    writerSchema = unionSchema;
                }

                using (var stream = new MemoryStream(initialBufferSize))
                    using (var writer = new BinaryWriter(stream))
                    {
                        stream.WriteByte(Constants.MagicByte);
                        writer.Write(IPAddress.HostToNetworkOrder(schemaId));
                        new GenericWriter <GenericRecord>(writerSchema)
                        .Write(data, new BinaryEncoder(stream));
                        return(stream.ToArray());
                    }
            }
            catch (AggregateException e)
            {
                throw e.InnerException;
            }
        }
Пример #8
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            Avro.Schema avschema = null;

            if (!string.IsNullOrWhiteSpace(_avroSchema))
            {
                avschema = Avro.Schema.Parse(_avroSchema);
            }

            IFileReader <GenericRecord> fileReader = null;

            using (var stream = new UnstructuredReaderAvroWrapper(input))
            {
                var foundSchema = false;

                if (_mapToInternalSchema)
                {
                    fileReader = DataFileReader <GenericRecord> .OpenReader(stream);

                    var schema = fileReader.GetSchema();

                    foundSchema = schema != null;
                }

                if (!foundSchema)
                {
                    stream.Position = 0;
                    fileReader      = DataFileReader <GenericRecord> .OpenReader(stream, avschema);
                }

                while (fileReader?.HasNext() == true)
                {
                    var avroRecord = fileReader.Next();

                    foreach (var column in output.Schema)
                    {
                        if (avroRecord.TryGetValue(column.Name, out var obj))
                        {
                            if (column.Type.IsInstanceOfType(obj))
                            {
                                output.Set(column.Name, obj);
                            }
                            else
                            {
                                if (obj == null || _ignoreColumnMismatches)
                                {
                                    output.Set(column.Name, column.DefaultValue);
                                }
                                else
                                {
                                    throw new Exception($"Column type mismatch. Output column {column.Name} of type {column.Type} is not an instance of avro file type {obj.GetType()}");
                                }
                            }
                        }
                        else
                        {
                            if (_ignoreColumnMismatches)
                            {
                                output.Set(column.Name, column.DefaultValue);
                            }
                            else
                            {
                                var fieldsString = string.Join(", ", avroRecord.Schema.Fields.Select(field => field.Name));
                                throw new Exception($"Column mismatch. Output schema column {column.Name} does not exist in avro schema fields: [{fieldsString}]");
                            }
                        }
                    }

                    yield return(output.AsReadOnly());
                }
            }
        }
        public SpecificSerializerImpl(
            ISchemaRegistryClient schemaRegistryClient,
            bool autoRegisterSchema,
            int initialBufferSize,
            bool isKey)
        {
            this.schemaRegistryClient = schemaRegistryClient;
            this.autoRegisterSchema   = autoRegisterSchema;
            this.initialBufferSize    = initialBufferSize;
            this.isKey = isKey;

            Type writerType = typeof(T);

            if (typeof(ISpecificRecord).IsAssignableFrom(writerType))
            {
                writerSchema = (Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null);
            }
            else if (writerType.Equals(typeof(int)))
            {
                writerSchema = Avro.Schema.Parse("int");
            }
            else if (writerType.Equals(typeof(bool)))
            {
                writerSchema = Avro.Schema.Parse("boolean");
            }
            else if (writerType.Equals(typeof(double)))
            {
                writerSchema = Avro.Schema.Parse("double");
            }
            else if (writerType.Equals(typeof(string)))
            {
                // Note: It would arguably be better to make this a union with null, to
                // exactly match the .NET string type, however we don't for consistency
                // with the Java avro serializer.
                writerSchema = Avro.Schema.Parse("string");
            }
            else if (writerType.Equals(typeof(float)))
            {
                writerSchema = Avro.Schema.Parse("float");
            }
            else if (writerType.Equals(typeof(long)))
            {
                writerSchema = Avro.Schema.Parse("long");
            }
            else if (writerType.Equals(typeof(byte[])))
            {
                // Note: It would arguably be better to make this a union with null, to
                // exactly match the .NET byte[] type, however we don't for consistency
                // with the Java avro serializer.
                writerSchema = Avro.Schema.Parse("bytes");
            }
            else if (writerType.Equals(typeof(Null)))
            {
                writerSchema = Avro.Schema.Parse("null");
            }
            else
            {
                throw new ArgumentException(
                          $"{nameof(AvroSerializer<T>)} " +
                          "only accepts type parameters of int, bool, double, string, float, " +
                          "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed."
                          );
            }

            avroWriter         = new SpecificWriter <T>(writerSchema);
            writerSchemaString = writerSchema.ToString();
        }
Пример #10
0
 public GenericAvroRecord(byte[] schemaVersion, Avro.Schema schema, IList <Field> fields, Avro.Generic.GenericRecord record) : base(schemaVersion, fields)
 {
     _schema = schema;
     _record = record;
 }
 public MultiVersionGenericAvroReader(bool useProvidedSchemaAsReaderSchema, Avro.Schema readerSchema) : base(useProvidedSchemaAsReaderSchema, new GenericAvroReader(readerSchema), readerSchema)
 {
 }
 protected internal AbstractMultiVersionGenericReader(bool useProvidedSchemaAsReaderSchema, ISchemaReader <IGenericRecord> providerSchemaReader, Avro.Schema readerSchema) : base(providerSchemaReader, readerSchema)
 {
     this.useProvidedSchemaAsReaderSchema = useProvidedSchemaAsReaderSchema;
 }
 public AbstractMultiVersionAvroBaseReader(ISchemaReader <T> providerSchemaReader, Avro.Schema readerSchema) : base(providerSchemaReader)
 {
     ReaderSchema = readerSchema;
 }
Пример #14
0
 public AvroBaseStructSchema(ISchemaInfo schemaInfo) : base(schemaInfo)
 {
     schema = SchemaUtils.ParseAvroSchema(Encoding.UTF8.GetString(schemaInfo.Schema));
 }