Exemple #1
0
        private static void checkAlternateSerializers <T>(byte[] expected, T value, Schema ws)
        {
            var ms     = new MemoryStream();
            var writer = new GenericDatumWriter <T>(ws);
            var e      = new BinaryEncoder(ms);

            writer.Write(value, e);
            writer.Write(value, e);
            var output = ms.ToArray();

            Assert.AreEqual(expected.Length, output.Length);
            Assert.True(expected.SequenceEqual(output));
        }
Exemple #2
0
        static void Main(string[] args)
        {
            var messages = new List <Message>();

            //Read & parse the avro schema
            var schema = Avro.Schema.Parse(File.ReadAllText("twitter.avsc"));
            DatumReader <GenericRecord> datumReader = new GenericDatumReader <GenericRecord>(schema, schema);

            //Open a file reader on the avro binary file with data
            var dataFileReader = Avro.File.DataFileReader <GenericRecord> .OpenReader("twitter.avro", schema);

            while (dataFileReader.HasNext())
            {
                var             tweet  = dataFileReader.Next();
                var             writer = new GenericDatumWriter <GenericRecord>(schema);
                MemoryStream    iostr  = new MemoryStream();
                Avro.IO.Encoder e      = new BinaryEncoder(iostr);
                writer.Write(tweet, e);
                var record = iostr.ToArray();
                //System.Console.WriteLine(record.Length);
                messages.Add(new Message(record));
            }

            foreach (var item in messages)
            {
                var envelope = Sign(item);
                envelope.Verify();
            }
        }
Exemple #3
0
        public override IEnumerable <(string, TimeSpan)> Run()
        {
            var stream = new MemoryStream();

            var reader = new GenericDatumReader <T>(Schema, Schema);
            var writer = new GenericDatumWriter <T>(Schema);

            using (stream)
            {
                var encoder = new BinaryEncoder(stream);

                foreach (var value in Values)
                {
                    writer.Write(value, encoder);
                }
            }

            var count = Values.Length;
            var size  = stream.ToArray().Length *Iterations / count;

            stream = new MemoryStream(size);

            using (stream)
            {
                var decoder = new BinaryDecoder(stream);
                var encoder = new BinaryEncoder(stream);

                var stopwatch = new Stopwatch();
                stopwatch.Start();

                for (int i = 0; i < Iterations; i++)
                {
                    writer.Write(Values[i % count], encoder);
                }

                stopwatch.Stop();
                yield return("serialization", stopwatch.Elapsed);

                stopwatch.Reset();
                stream.Position = 0;
                stopwatch.Start();

                for (int i = 0; i < Iterations; i++)
                {
                    reader.Read(default, decoder);
        /// <summary>
        /// Comverts the model into avro serialized byte stream. This assumes that the schema and the
        /// model types are same, else it will throw exception
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="toTransform"></param>
        /// <param name="schema"></param>
        /// <param name="schemaString"></param>
        /// <returns></returns>
        public byte[] Format <T>(T toTransform, Schema schema, string schemaString)
        {
            byte[] serializedModel;
            var    datumWriter = new GenericDatumWriter <GenericRecord>(schema);
            var    schemap     = Schema.Parse(schemaString) as RecordSchema;

            using (var memoryStream = new MemoryStream())
            {
                Encoder encoder = new BinaryEncoder(memoryStream);
                datumWriter.Write(GetGenericRecord(toTransform, schema, schemaString), encoder);
                serializedModel = memoryStream.ToArray();
            }

            return(serializedModel);
        }
Exemple #5
0
        private byte[] GenericRecordsToAvro(GenericRecord record)
        {
            using (MemoryStream outputStream = new MemoryStream())
            {
                GenericDatumWriter <GenericRecord> writer = new GenericDatumWriter <GenericRecord>(record.Schema);
                BinaryEncoder encoder = new BinaryEncoder(outputStream);

                for (int i = 0; i < _numberOfRecordsInAvro; i++)
                {
                    writer.Write(record, encoder);
                }

                encoder.Flush();

                return(outputStream.ToArray());
            }
        }
Exemple #6
0
 public byte[] Write(IGenericRecord message)
 {
     lock (this)
     {
         try
         {
             _writer.Write(((GenericAvroRecord)message).AvroRecord, _encoder);
             _encoder.Flush();
             var bytes = _byteArrayOutputStream.ToArray();
             return(bytes);
         }
         catch (Exception e)
         {
             throw new SchemaSerializationException(e);
         }
         finally
         {
             _byteArrayOutputStream.SetLength(0);
         }
     }
 }
        public static void Invoke()
        {
            //Arrange
            var     fixture = new Fixture();
            Dataset dataset = fixture.Create <Dataset>();

            var    schema       = AvroConvert.GenerateSchema(typeof(Dataset));
            Schema apacheSchema = Schema.Parse(schema);


            //AvroConvert to Apache
            var avroConvertSerialized = AvroConvert.SerializeHeadless(dataset, schema);

            Dataset apacheDeserialized;

            using (var ms = new MemoryStream(avroConvertSerialized))
            {
                var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema);
                var decoder      = new BinaryDecoder(ms);
                apacheDeserialized = (ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder)));
            }

            Contract.Assert(dataset == apacheDeserialized);


            //Apache to AvroConvert
            MemoryStream apacheAvroSerializeStream = new MemoryStream();
            var          encoder      = new BinaryEncoder(apacheAvroSerializeStream);
            var          apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema);

            apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder);

            var apacheSerialized = apacheAvroSerializeStream.ToArray();

            var avroConvertDeserialized = AvroConvert.DeserializeHeadless <Dataset>(apacheSerialized);

            Contract.Assert(dataset == avroConvertDeserialized);
        }
Exemple #8
0
        private static BenchmarkResult RunBenchmark(Dataset[] datasets, string schema)
        {
            var result = new BenchmarkResult();

            Stopwatch stopwatch = Stopwatch.StartNew();


            //Serialize Apache.Avro
            MemoryStream apacheAvroSerializeStream = new MemoryStream();
            var          encoder      = new BinaryEncoder(apacheAvroSerializeStream);
            var          apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset)));

            var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema);

            foreach (var dataset in datasets)
            {
                apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder);
            }

            var apacheAvro = apacheAvroSerializeStream.ToArray();

            result.ApacheAvroSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize Apache.Avro

            List <Dataset> apacheResult = new List <Dataset>();

            using (var ms = new MemoryStream(apacheAvro))
            {
                apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset)));
                var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema);
                var decoder      = new BinaryDecoder(ms);
                foreach (var dataset in datasets)
                {
                    apacheResult.Add(ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder)));
                }
            }
            result.ApacheAvroDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert Headerless
            var avroHeadless = AvroConvert.SerializeHeadless(datasets, schema);

            result.AvroConvertHeadlessSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert Headerless
            AvroConvert.DeserializeHeadless <List <Dataset> >(avroHeadless, schema);
            result.AvroConvertHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert Gzip
            var avroGzip = AvroConvert.Serialize(datasets, CodecType.GZip);

            result.AvroConvertGzipSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert Gzip
            AvroConvert.Deserialize <Dataset[]>(avroGzip);
            result.AvroConvertGzipDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert vNext
            var newAvro = AvroConvertToUpdate.AvroConvert.SerializeHeadless(datasets, schema);

            result.AvroConvertVNextHeadlessSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert vNext
            AvroConvertToUpdate.AvroConvert.DeserializeHeadless <Dataset[]>(newAvro, schema);
            result.AvroConvertVNextHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Stop();

            //Serialize AvroConvert vNext Gzip
            var newAvroGzip = AvroConvertToUpdate.AvroConvert.Serialize(datasets, AvroConvertToUpdate.Codec.CodecType.GZip);

            result.AvroConvertVNextGzipSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert vNext Gzip
            AvroConvertToUpdate.AvroConvert.Deserialize <Dataset[]>(newAvroGzip);
            result.AvroConvertVNextGzipDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Stop();


            //Size
            result.ApacheAvroSize           = apacheAvro.Length;
            result.AvroConvertHeadlessSize  = avroHeadless.Length;
            result.AvroConvertGzipSize      = avroGzip.Length;
            result.AvroConvertVNextSize     = newAvro.Length;
            result.AvroConvertVNextGzipSize = newAvroGzip.Length;

            return(result);
        }