private static void checkAlternateSerializers <T>(byte[] expected, T value, Schema ws) { var ms = new MemoryStream(); var writer = new GenericDatumWriter <T>(ws); var e = new BinaryEncoder(ms); writer.Write(value, e); writer.Write(value, e); var output = ms.ToArray(); Assert.AreEqual(expected.Length, output.Length); Assert.True(expected.SequenceEqual(output)); }
static void Main(string[] args) { var messages = new List <Message>(); //Read & parse the avro schema var schema = Avro.Schema.Parse(File.ReadAllText("twitter.avsc")); DatumReader <GenericRecord> datumReader = new GenericDatumReader <GenericRecord>(schema, schema); //Open a file reader on the avro binary file with data var dataFileReader = Avro.File.DataFileReader <GenericRecord> .OpenReader("twitter.avro", schema); while (dataFileReader.HasNext()) { var tweet = dataFileReader.Next(); var writer = new GenericDatumWriter <GenericRecord>(schema); MemoryStream iostr = new MemoryStream(); Avro.IO.Encoder e = new BinaryEncoder(iostr); writer.Write(tweet, e); var record = iostr.ToArray(); //System.Console.WriteLine(record.Length); messages.Add(new Message(record)); } foreach (var item in messages) { var envelope = Sign(item); envelope.Verify(); } }
public override IEnumerable <(string, TimeSpan)> Run() { var stream = new MemoryStream(); var reader = new GenericDatumReader <T>(Schema, Schema); var writer = new GenericDatumWriter <T>(Schema); using (stream) { var encoder = new BinaryEncoder(stream); foreach (var value in Values) { writer.Write(value, encoder); } } var count = Values.Length; var size = stream.ToArray().Length *Iterations / count; stream = new MemoryStream(size); using (stream) { var decoder = new BinaryDecoder(stream); var encoder = new BinaryEncoder(stream); var stopwatch = new Stopwatch(); stopwatch.Start(); for (int i = 0; i < Iterations; i++) { writer.Write(Values[i % count], encoder); } stopwatch.Stop(); yield return("serialization", stopwatch.Elapsed); stopwatch.Reset(); stream.Position = 0; stopwatch.Start(); for (int i = 0; i < Iterations; i++) { reader.Read(default, decoder);
/// <summary> /// Comverts the model into avro serialized byte stream. This assumes that the schema and the /// model types are same, else it will throw exception /// </summary> /// <typeparam name="T"></typeparam> /// <param name="toTransform"></param> /// <param name="schema"></param> /// <param name="schemaString"></param> /// <returns></returns> public byte[] Format <T>(T toTransform, Schema schema, string schemaString) { byte[] serializedModel; var datumWriter = new GenericDatumWriter <GenericRecord>(schema); var schemap = Schema.Parse(schemaString) as RecordSchema; using (var memoryStream = new MemoryStream()) { Encoder encoder = new BinaryEncoder(memoryStream); datumWriter.Write(GetGenericRecord(toTransform, schema, schemaString), encoder); serializedModel = memoryStream.ToArray(); } return(serializedModel); }
private byte[] GenericRecordsToAvro(GenericRecord record) { using (MemoryStream outputStream = new MemoryStream()) { GenericDatumWriter <GenericRecord> writer = new GenericDatumWriter <GenericRecord>(record.Schema); BinaryEncoder encoder = new BinaryEncoder(outputStream); for (int i = 0; i < _numberOfRecordsInAvro; i++) { writer.Write(record, encoder); } encoder.Flush(); return(outputStream.ToArray()); } }
public byte[] Write(IGenericRecord message) { lock (this) { try { _writer.Write(((GenericAvroRecord)message).AvroRecord, _encoder); _encoder.Flush(); var bytes = _byteArrayOutputStream.ToArray(); return(bytes); } catch (Exception e) { throw new SchemaSerializationException(e); } finally { _byteArrayOutputStream.SetLength(0); } } }
public static void Invoke() { //Arrange var fixture = new Fixture(); Dataset dataset = fixture.Create <Dataset>(); var schema = AvroConvert.GenerateSchema(typeof(Dataset)); Schema apacheSchema = Schema.Parse(schema); //AvroConvert to Apache var avroConvertSerialized = AvroConvert.SerializeHeadless(dataset, schema); Dataset apacheDeserialized; using (var ms = new MemoryStream(avroConvertSerialized)) { var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema); var decoder = new BinaryDecoder(ms); apacheDeserialized = (ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder))); } Contract.Assert(dataset == apacheDeserialized); //Apache to AvroConvert MemoryStream apacheAvroSerializeStream = new MemoryStream(); var encoder = new BinaryEncoder(apacheAvroSerializeStream); var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema); apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder); var apacheSerialized = apacheAvroSerializeStream.ToArray(); var avroConvertDeserialized = AvroConvert.DeserializeHeadless <Dataset>(apacheSerialized); Contract.Assert(dataset == avroConvertDeserialized); }
private static BenchmarkResult RunBenchmark(Dataset[] datasets, string schema) { var result = new BenchmarkResult(); Stopwatch stopwatch = Stopwatch.StartNew(); //Serialize Apache.Avro MemoryStream apacheAvroSerializeStream = new MemoryStream(); var encoder = new BinaryEncoder(apacheAvroSerializeStream); var apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset))); var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema); foreach (var dataset in datasets) { apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder); } var apacheAvro = apacheAvroSerializeStream.ToArray(); result.ApacheAvroSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize Apache.Avro List <Dataset> apacheResult = new List <Dataset>(); using (var ms = new MemoryStream(apacheAvro)) { apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset))); var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema); var decoder = new BinaryDecoder(ms); foreach (var dataset in datasets) { apacheResult.Add(ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder))); } } result.ApacheAvroDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert Headerless var avroHeadless = AvroConvert.SerializeHeadless(datasets, schema); result.AvroConvertHeadlessSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert Headerless AvroConvert.DeserializeHeadless <List <Dataset> >(avroHeadless, schema); result.AvroConvertHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert Gzip var avroGzip = AvroConvert.Serialize(datasets, CodecType.GZip); result.AvroConvertGzipSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert Gzip AvroConvert.Deserialize <Dataset[]>(avroGzip); result.AvroConvertGzipDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert vNext var newAvro = AvroConvertToUpdate.AvroConvert.SerializeHeadless(datasets, schema); result.AvroConvertVNextHeadlessSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert vNext AvroConvertToUpdate.AvroConvert.DeserializeHeadless <Dataset[]>(newAvro, schema); result.AvroConvertVNextHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Stop(); //Serialize AvroConvert vNext Gzip var newAvroGzip = AvroConvertToUpdate.AvroConvert.Serialize(datasets, AvroConvertToUpdate.Codec.CodecType.GZip); result.AvroConvertVNextGzipSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert vNext Gzip AvroConvertToUpdate.AvroConvert.Deserialize <Dataset[]>(newAvroGzip); result.AvroConvertVNextGzipDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Stop(); //Size result.ApacheAvroSize = apacheAvro.Length; result.AvroConvertHeadlessSize = avroHeadless.Length; result.AvroConvertGzipSize = avroGzip.Length; result.AvroConvertVNextSize = newAvro.Length; result.AvroConvertVNextGzipSize = newAvroGzip.Length; return(result); }