示例#1
0
        public override void Output(IRow input, IUnstructuredWriter output)
        {
            if (_fileWriter == null)
            {
                _avSchema = Schema.Parse(_avroSchema) as RecordSchema;
                var writer = new GenericDatumWriter <GenericRecord>(_avSchema);
                _fileWriter = DataFileWriter <GenericRecord> .OpenWriter(writer, output.BaseStream);
            }

            var record = new GenericRecord(_avSchema);

            foreach (var x in input.Schema)
            {
                var obj = input.Get <object>(x.Name);

                if (obj != null)
                {
                    var objType = obj.GetType();
                    if (objType.IsGenericType && objType.GetGenericTypeDefinition() == typeof(SqlArray <>))
                    {
                        obj = ((System.Collections.IEnumerable)obj).Cast <object>().ToArray();
                    }
                }

                record.Add(x.Name, obj);
            }

            _fileWriter.Append(record);
        }
示例#2
0
        static void Main(string[] args)
        {
            var messages = new List <Message>();

            //Read & parse the avro schema
            var schema = Avro.Schema.Parse(File.ReadAllText("twitter.avsc"));
            DatumReader <GenericRecord> datumReader = new GenericDatumReader <GenericRecord>(schema, schema);

            //Open a file reader on the avro binary file with data
            var dataFileReader = Avro.File.DataFileReader <GenericRecord> .OpenReader("twitter.avro", schema);

            while (dataFileReader.HasNext())
            {
                var             tweet  = dataFileReader.Next();
                var             writer = new GenericDatumWriter <GenericRecord>(schema);
                MemoryStream    iostr  = new MemoryStream();
                Avro.IO.Encoder e      = new BinaryEncoder(iostr);
                writer.Write(tweet, e);
                var record = iostr.ToArray();
                //System.Console.WriteLine(record.Length);
                messages.Add(new Message(record));
            }

            foreach (var item in messages)
            {
                var envelope = Sign(item);
                envelope.Verify();
            }
        }
示例#3
0
        public AvroWriter(IAvroFileValueDef <V> valueDef, FileStream stream, Codec.Type codec) : base(valueDef, stream)
        {
            Preconditions.CheckArgument(Stream.CanWrite);
            var datumWriter = new GenericDatumWriter <GenericRecord>(ValueDef.Schema);

            mWriter = DataFileWriter <GenericRecord> .OpenWriter(datumWriter, Stream, Codec.CreateCodec(codec));

            Stream.Position = Stream.Length;
        }
示例#4
0
        private static void checkAlternateSerializers <T>(byte[] expected, T value, Schema ws)
        {
            var ms     = new MemoryStream();
            var writer = new GenericDatumWriter <T>(ws);
            var e      = new BinaryEncoder(ms);

            writer.Write(value, e);
            var output = ms.ToArray();

            Assert.AreEqual(expected.Length, output.Length);
            Assert.True(expected.SequenceEqual(output));
        }
        /// <summary>
        /// Comverts the model into avro serialized byte stream. This assumes that the schema and the
        /// model types are same, else it will throw exception
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="toTransform"></param>
        /// <param name="schema"></param>
        /// <param name="schemaString"></param>
        /// <returns></returns>
        public byte[] Format <T>(T toTransform, Schema schema, string schemaString)
        {
            byte[] serializedModel;
            var    datumWriter = new GenericDatumWriter <GenericRecord>(schema);
            var    schemap     = Schema.Parse(schemaString) as RecordSchema;

            using (var memoryStream = new MemoryStream())
            {
                Encoder encoder = new BinaryEncoder(memoryStream);
                datumWriter.Write(GetGenericRecord(toTransform, schema, schemaString), encoder);
                serializedModel = memoryStream.ToArray();
            }

            return(serializedModel);
        }
示例#6
0
        private byte[] GenericRecordsToAvro(GenericRecord record)
        {
            using (MemoryStream outputStream = new MemoryStream())
            {
                GenericDatumWriter <GenericRecord> writer = new GenericDatumWriter <GenericRecord>(record.Schema);
                BinaryEncoder encoder = new BinaryEncoder(outputStream);

                for (int i = 0; i < _numberOfRecordsInAvro; i++)
                {
                    writer.Write(record, encoder);
                }

                encoder.Flush();

                return(outputStream.ToArray());
            }
        }
示例#7
0
        public override IEnumerable <(string, TimeSpan)> Run()
        {
            var stream = new MemoryStream();

            var reader = new GenericDatumReader <T>(Schema, Schema);
            var writer = new GenericDatumWriter <T>(Schema);

            using (stream)
            {
                var encoder = new BinaryEncoder(stream);

                foreach (var value in Values)
                {
                    writer.Write(value, encoder);
                }
            }

            var count = Values.Length;
            var size  = stream.ToArray().Length *Iterations / count;

            stream = new MemoryStream(size);

            using (stream)
            {
                var decoder = new BinaryDecoder(stream);
                var encoder = new BinaryEncoder(stream);

                var stopwatch = new Stopwatch();
                stopwatch.Start();

                for (int i = 0; i < Iterations; i++)
                {
                    writer.Write(Values[i % count], encoder);
                }

                stopwatch.Stop();
                yield return("serialization", stopwatch.Elapsed);

                stopwatch.Reset();
                stream.Position = 0;
                stopwatch.Start();

                for (int i = 0; i < Iterations; i++)
                {
                    reader.Read(default, decoder);
示例#8
0
        public static void Invoke()
        {
            //Arrange
            var     fixture = new Fixture();
            Dataset dataset = fixture.Create <Dataset>();

            var    schema       = AvroConvert.GenerateSchema(typeof(Dataset));
            Schema apacheSchema = Schema.Parse(schema);


            //AvroConvert to Apache
            var avroConvertSerialized = AvroConvert.SerializeHeadless(dataset, schema);

            Dataset apacheDeserialized;

            using (var ms = new MemoryStream(avroConvertSerialized))
            {
                var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema);
                var decoder      = new BinaryDecoder(ms);
                apacheDeserialized = (ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder)));
            }

            Contract.Assert(dataset == apacheDeserialized);


            //Apache to AvroConvert
            MemoryStream apacheAvroSerializeStream = new MemoryStream();
            var          encoder      = new BinaryEncoder(apacheAvroSerializeStream);
            var          apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema);

            apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder);

            var apacheSerialized = apacheAvroSerializeStream.ToArray();

            var avroConvertDeserialized = AvroConvert.DeserializeHeadless <Dataset>(apacheSerialized);

            Contract.Assert(dataset == avroConvertDeserialized);
        }
示例#9
0
        private static void BuildDataFileWriter(string outputFilePath, Mode mode)
        {
            GenericDatumWriter <GenericRecord> datumWriter = new GenericDatumWriter <GenericRecord>(_avroSchema);
            Codec codec = Codec.CreateCodec(Codec.Type.Deflate);

            switch (mode)
            {
            case Mode.Overwrite:
                _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter(
                    datumWriter, new FileStream(outputFilePath, FileMode.Create), codec);

                break;

            case Mode.Append:
                _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter(
                    datumWriter, new FileStream(outputFilePath, FileMode.Append), codec);

                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(mode));
            }
        }
示例#10
0
        private static BenchmarkResult RunBenchmark(Dataset[] datasets, string schema)
        {
            var result = new BenchmarkResult();

            Stopwatch stopwatch = Stopwatch.StartNew();


            //Serialize Apache.Avro
            MemoryStream apacheAvroSerializeStream = new MemoryStream();
            var          encoder      = new BinaryEncoder(apacheAvroSerializeStream);
            var          apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset)));

            var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema);

            foreach (var dataset in datasets)
            {
                apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder);
            }

            var apacheAvro = apacheAvroSerializeStream.ToArray();

            result.ApacheAvroSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize Apache.Avro

            List <Dataset> apacheResult = new List <Dataset>();

            using (var ms = new MemoryStream(apacheAvro))
            {
                apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset)));
                var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema);
                var decoder      = new BinaryDecoder(ms);
                foreach (var dataset in datasets)
                {
                    apacheResult.Add(ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder)));
                }
            }
            result.ApacheAvroDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert Headerless
            var avroHeadless = AvroConvert.SerializeHeadless(datasets, schema);

            result.AvroConvertHeadlessSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert Headerless
            AvroConvert.DeserializeHeadless <List <Dataset> >(avroHeadless, schema);
            result.AvroConvertHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert Gzip
            var avroGzip = AvroConvert.Serialize(datasets, CodecType.GZip);

            result.AvroConvertGzipSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert Gzip
            AvroConvert.Deserialize <Dataset[]>(avroGzip);
            result.AvroConvertGzipDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert vNext
            var newAvro = AvroConvertToUpdate.AvroConvert.SerializeHeadless(datasets, schema);

            result.AvroConvertVNextHeadlessSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert vNext
            AvroConvertToUpdate.AvroConvert.DeserializeHeadless <Dataset[]>(newAvro, schema);
            result.AvroConvertVNextHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Stop();

            //Serialize AvroConvert vNext Gzip
            var newAvroGzip = AvroConvertToUpdate.AvroConvert.Serialize(datasets, AvroConvertToUpdate.Codec.CodecType.GZip);

            result.AvroConvertVNextGzipSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert vNext Gzip
            AvroConvertToUpdate.AvroConvert.Deserialize <Dataset[]>(newAvroGzip);
            result.AvroConvertVNextGzipDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Stop();


            //Size
            result.ApacheAvroSize           = apacheAvro.Length;
            result.AvroConvertHeadlessSize  = avroHeadless.Length;
            result.AvroConvertGzipSize      = avroGzip.Length;
            result.AvroConvertVNextSize     = newAvro.Length;
            result.AvroConvertVNextGzipSize = newAvroGzip.Length;

            return(result);
        }
示例#11
0
        static void GenerateInteropData(string schemaPath, string outputDir)
        {
            RecordSchema schema = null;

            using (var reader = new StreamReader(schemaPath))
            {
                schema = Schema.Parse(reader.ReadToEnd()) as RecordSchema;
            }

            var mapFieldSchema  = (schema.Fields.Find(x => x.Name == "mapField").Schema as MapSchema).ValueSchema as RecordSchema;
            var mapFieldRecord0 = new GenericRecord(mapFieldSchema);
            var mapFieldRecord1 = new GenericRecord(mapFieldSchema);

            mapFieldRecord0.Add("label", "a");
            mapFieldRecord1.Add("label", "cee");
            var mapFieldValue = new Dictionary <string, GenericRecord>
            {
                { "a", mapFieldRecord0 },
                { "bee", mapFieldRecord1 }
            };

            var enumFieldValue = new GenericEnum(schema.Fields.Find(x => x.Name == "enumField").Schema as EnumSchema, "C");

            var fixedFieldValue = new GenericFixed(
                schema.Fields.Find(x => x.Name == "fixedField").Schema as FixedSchema,
                Encoding.ASCII.GetBytes("1019181716151413"));

            var nodeSchema            = schema.Fields.Find(x => x.Name == "recordField").Schema as RecordSchema;
            var recordFieldValue      = new GenericRecord(nodeSchema);
            var innerRecordFieldValue = new GenericRecord(nodeSchema);

            innerRecordFieldValue.Add("label", "inner");
            innerRecordFieldValue.Add("children", new GenericRecord[] { });
            recordFieldValue.Add("label", "blah");
            recordFieldValue.Add("children", new GenericRecord[] { innerRecordFieldValue });

            GenericRecord record = new GenericRecord(schema);

            record.Add("intField", 12);
            record.Add("longField", 15234324L);
            record.Add("stringField", "hey");
            record.Add("boolField", true);
            record.Add("floatField", 1234.0f);
            record.Add("doubleField", -1234.0);
            record.Add("bytesField", Encoding.UTF8.GetBytes("12312adf"));
            record.Add("nullField", null);
            record.Add("arrayField", new double[] { 5.0, 0.0, 12.0 });
            record.Add("mapField", mapFieldValue);
            record.Add("unionField", 12.0);
            record.Add("enumField", enumFieldValue);
            record.Add("fixedField", fixedFieldValue);
            record.Add("recordField", recordFieldValue);

            var datumWriter = new GenericDatumWriter <GenericRecord>(schema);

            foreach (var codecName in InteropDataConstants.SupportedCodecNames)
            {
                var outputFile = "csharp.avro";
                if (codecName != DataFileConstants.NullCodec)
                {
                    outputFile = string.Format("csharp_{0}.avro", codecName);
                }
                var outputPath = Path.Combine(outputDir, outputFile);
                var codec      = Codec.CreateCodecFromString(codecName);
                using (var dataFileWriter = DataFileWriter <GenericRecord> .OpenWriter(datumWriter, outputPath, codec))
                {
                    dataFileWriter.Append(record);
                }
            }
        }
示例#12
0
 public GenericAvroWriter(Avro.RecordSchema schema)
 {
     _writer = new GenericDatumWriter <GenericRecord>(schema);
     _byteArrayOutputStream = new MemoryStream();
     _encoder = new BinaryEncoder(_byteArrayOutputStream);
 }