public override void Output(IRow input, IUnstructuredWriter output) { if (_fileWriter == null) { _avSchema = Schema.Parse(_avroSchema) as RecordSchema; var writer = new GenericDatumWriter <GenericRecord>(_avSchema); _fileWriter = DataFileWriter <GenericRecord> .OpenWriter(writer, output.BaseStream); } var record = new GenericRecord(_avSchema); foreach (var x in input.Schema) { var obj = input.Get <object>(x.Name); if (obj != null) { var objType = obj.GetType(); if (objType.IsGenericType && objType.GetGenericTypeDefinition() == typeof(SqlArray <>)) { obj = ((System.Collections.IEnumerable)obj).Cast <object>().ToArray(); } } record.Add(x.Name, obj); } _fileWriter.Append(record); }
static void Main(string[] args) { var messages = new List <Message>(); //Read & parse the avro schema var schema = Avro.Schema.Parse(File.ReadAllText("twitter.avsc")); DatumReader <GenericRecord> datumReader = new GenericDatumReader <GenericRecord>(schema, schema); //Open a file reader on the avro binary file with data var dataFileReader = Avro.File.DataFileReader <GenericRecord> .OpenReader("twitter.avro", schema); while (dataFileReader.HasNext()) { var tweet = dataFileReader.Next(); var writer = new GenericDatumWriter <GenericRecord>(schema); MemoryStream iostr = new MemoryStream(); Avro.IO.Encoder e = new BinaryEncoder(iostr); writer.Write(tweet, e); var record = iostr.ToArray(); //System.Console.WriteLine(record.Length); messages.Add(new Message(record)); } foreach (var item in messages) { var envelope = Sign(item); envelope.Verify(); } }
public AvroWriter(IAvroFileValueDef <V> valueDef, FileStream stream, Codec.Type codec) : base(valueDef, stream) { Preconditions.CheckArgument(Stream.CanWrite); var datumWriter = new GenericDatumWriter <GenericRecord>(ValueDef.Schema); mWriter = DataFileWriter <GenericRecord> .OpenWriter(datumWriter, Stream, Codec.CreateCodec(codec)); Stream.Position = Stream.Length; }
private static void checkAlternateSerializers <T>(byte[] expected, T value, Schema ws) { var ms = new MemoryStream(); var writer = new GenericDatumWriter <T>(ws); var e = new BinaryEncoder(ms); writer.Write(value, e); var output = ms.ToArray(); Assert.AreEqual(expected.Length, output.Length); Assert.True(expected.SequenceEqual(output)); }
/// <summary> /// Comverts the model into avro serialized byte stream. This assumes that the schema and the /// model types are same, else it will throw exception /// </summary> /// <typeparam name="T"></typeparam> /// <param name="toTransform"></param> /// <param name="schema"></param> /// <param name="schemaString"></param> /// <returns></returns> public byte[] Format <T>(T toTransform, Schema schema, string schemaString) { byte[] serializedModel; var datumWriter = new GenericDatumWriter <GenericRecord>(schema); var schemap = Schema.Parse(schemaString) as RecordSchema; using (var memoryStream = new MemoryStream()) { Encoder encoder = new BinaryEncoder(memoryStream); datumWriter.Write(GetGenericRecord(toTransform, schema, schemaString), encoder); serializedModel = memoryStream.ToArray(); } return(serializedModel); }
private byte[] GenericRecordsToAvro(GenericRecord record) { using (MemoryStream outputStream = new MemoryStream()) { GenericDatumWriter <GenericRecord> writer = new GenericDatumWriter <GenericRecord>(record.Schema); BinaryEncoder encoder = new BinaryEncoder(outputStream); for (int i = 0; i < _numberOfRecordsInAvro; i++) { writer.Write(record, encoder); } encoder.Flush(); return(outputStream.ToArray()); } }
public override IEnumerable <(string, TimeSpan)> Run() { var stream = new MemoryStream(); var reader = new GenericDatumReader <T>(Schema, Schema); var writer = new GenericDatumWriter <T>(Schema); using (stream) { var encoder = new BinaryEncoder(stream); foreach (var value in Values) { writer.Write(value, encoder); } } var count = Values.Length; var size = stream.ToArray().Length *Iterations / count; stream = new MemoryStream(size); using (stream) { var decoder = new BinaryDecoder(stream); var encoder = new BinaryEncoder(stream); var stopwatch = new Stopwatch(); stopwatch.Start(); for (int i = 0; i < Iterations; i++) { writer.Write(Values[i % count], encoder); } stopwatch.Stop(); yield return("serialization", stopwatch.Elapsed); stopwatch.Reset(); stream.Position = 0; stopwatch.Start(); for (int i = 0; i < Iterations; i++) { reader.Read(default, decoder);
public static void Invoke() { //Arrange var fixture = new Fixture(); Dataset dataset = fixture.Create <Dataset>(); var schema = AvroConvert.GenerateSchema(typeof(Dataset)); Schema apacheSchema = Schema.Parse(schema); //AvroConvert to Apache var avroConvertSerialized = AvroConvert.SerializeHeadless(dataset, schema); Dataset apacheDeserialized; using (var ms = new MemoryStream(avroConvertSerialized)) { var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema); var decoder = new BinaryDecoder(ms); apacheDeserialized = (ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder))); } Contract.Assert(dataset == apacheDeserialized); //Apache to AvroConvert MemoryStream apacheAvroSerializeStream = new MemoryStream(); var encoder = new BinaryEncoder(apacheAvroSerializeStream); var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema); apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder); var apacheSerialized = apacheAvroSerializeStream.ToArray(); var avroConvertDeserialized = AvroConvert.DeserializeHeadless <Dataset>(apacheSerialized); Contract.Assert(dataset == avroConvertDeserialized); }
private static void BuildDataFileWriter(string outputFilePath, Mode mode) { GenericDatumWriter <GenericRecord> datumWriter = new GenericDatumWriter <GenericRecord>(_avroSchema); Codec codec = Codec.CreateCodec(Codec.Type.Deflate); switch (mode) { case Mode.Overwrite: _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter( datumWriter, new FileStream(outputFilePath, FileMode.Create), codec); break; case Mode.Append: _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter( datumWriter, new FileStream(outputFilePath, FileMode.Append), codec); break; default: throw new ArgumentOutOfRangeException(nameof(mode)); } }
private static BenchmarkResult RunBenchmark(Dataset[] datasets, string schema) { var result = new BenchmarkResult(); Stopwatch stopwatch = Stopwatch.StartNew(); //Serialize Apache.Avro MemoryStream apacheAvroSerializeStream = new MemoryStream(); var encoder = new BinaryEncoder(apacheAvroSerializeStream); var apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset))); var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema); foreach (var dataset in datasets) { apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder); } var apacheAvro = apacheAvroSerializeStream.ToArray(); result.ApacheAvroSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize Apache.Avro List <Dataset> apacheResult = new List <Dataset>(); using (var ms = new MemoryStream(apacheAvro)) { apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset))); var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema); var decoder = new BinaryDecoder(ms); foreach (var dataset in datasets) { apacheResult.Add(ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder))); } } result.ApacheAvroDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert Headerless var avroHeadless = AvroConvert.SerializeHeadless(datasets, schema); result.AvroConvertHeadlessSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert Headerless AvroConvert.DeserializeHeadless <List <Dataset> >(avroHeadless, schema); result.AvroConvertHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert Gzip var avroGzip = AvroConvert.Serialize(datasets, CodecType.GZip); result.AvroConvertGzipSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert Gzip AvroConvert.Deserialize <Dataset[]>(avroGzip); result.AvroConvertGzipDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert vNext var newAvro = AvroConvertToUpdate.AvroConvert.SerializeHeadless(datasets, schema); result.AvroConvertVNextHeadlessSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert vNext AvroConvertToUpdate.AvroConvert.DeserializeHeadless <Dataset[]>(newAvro, schema); result.AvroConvertVNextHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Stop(); //Serialize AvroConvert vNext Gzip var newAvroGzip = AvroConvertToUpdate.AvroConvert.Serialize(datasets, AvroConvertToUpdate.Codec.CodecType.GZip); result.AvroConvertVNextGzipSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert vNext Gzip AvroConvertToUpdate.AvroConvert.Deserialize <Dataset[]>(newAvroGzip); result.AvroConvertVNextGzipDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Stop(); //Size result.ApacheAvroSize = apacheAvro.Length; result.AvroConvertHeadlessSize = avroHeadless.Length; result.AvroConvertGzipSize = avroGzip.Length; result.AvroConvertVNextSize = newAvro.Length; result.AvroConvertVNextGzipSize = newAvroGzip.Length; return(result); }
static void GenerateInteropData(string schemaPath, string outputDir) { RecordSchema schema = null; using (var reader = new StreamReader(schemaPath)) { schema = Schema.Parse(reader.ReadToEnd()) as RecordSchema; } var mapFieldSchema = (schema.Fields.Find(x => x.Name == "mapField").Schema as MapSchema).ValueSchema as RecordSchema; var mapFieldRecord0 = new GenericRecord(mapFieldSchema); var mapFieldRecord1 = new GenericRecord(mapFieldSchema); mapFieldRecord0.Add("label", "a"); mapFieldRecord1.Add("label", "cee"); var mapFieldValue = new Dictionary <string, GenericRecord> { { "a", mapFieldRecord0 }, { "bee", mapFieldRecord1 } }; var enumFieldValue = new GenericEnum(schema.Fields.Find(x => x.Name == "enumField").Schema as EnumSchema, "C"); var fixedFieldValue = new GenericFixed( schema.Fields.Find(x => x.Name == "fixedField").Schema as FixedSchema, Encoding.ASCII.GetBytes("1019181716151413")); var nodeSchema = schema.Fields.Find(x => x.Name == "recordField").Schema as RecordSchema; var recordFieldValue = new GenericRecord(nodeSchema); var innerRecordFieldValue = new GenericRecord(nodeSchema); innerRecordFieldValue.Add("label", "inner"); innerRecordFieldValue.Add("children", new GenericRecord[] { }); recordFieldValue.Add("label", "blah"); recordFieldValue.Add("children", new GenericRecord[] { innerRecordFieldValue }); GenericRecord record = new GenericRecord(schema); record.Add("intField", 12); record.Add("longField", 15234324L); record.Add("stringField", "hey"); record.Add("boolField", true); record.Add("floatField", 1234.0f); record.Add("doubleField", -1234.0); record.Add("bytesField", Encoding.UTF8.GetBytes("12312adf")); record.Add("nullField", null); record.Add("arrayField", new double[] { 5.0, 0.0, 12.0 }); record.Add("mapField", mapFieldValue); record.Add("unionField", 12.0); record.Add("enumField", enumFieldValue); record.Add("fixedField", fixedFieldValue); record.Add("recordField", recordFieldValue); var datumWriter = new GenericDatumWriter <GenericRecord>(schema); foreach (var codecName in InteropDataConstants.SupportedCodecNames) { var outputFile = "csharp.avro"; if (codecName != DataFileConstants.NullCodec) { outputFile = string.Format("csharp_{0}.avro", codecName); } var outputPath = Path.Combine(outputDir, outputFile); var codec = Codec.CreateCodecFromString(codecName); using (var dataFileWriter = DataFileWriter <GenericRecord> .OpenWriter(datumWriter, outputPath, codec)) { dataFileWriter.Append(record); } } }
public GenericAvroWriter(Avro.RecordSchema schema) { _writer = new GenericDatumWriter <GenericRecord>(schema); _byteArrayOutputStream = new MemoryStream(); _encoder = new BinaryEncoder(_byteArrayOutputStream); }