public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { var avschema = Avro.Schema.Parse(avroSchema); var reader = new GenericDatumReader <GenericRecord>(avschema, avschema); using (var ms = new MemoryStream()) { CreateSeekableStream(input, ms); ms.Position = 0; var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema); while (fileReader.HasNext()) { var avroRecord = fileReader.Next(); foreach (var column in output.Schema) { if (avroRecord[column.Name] != null) { output.Set(column.Name, avroRecord[column.Name]); } else { output.Set <object>(column.Name, null); } yield return(output.AsReadOnly()); } } } }
public GenericAvroReader(Schema writerSchema, Schema readerSchema, byte[] schemaVersion) { _schema = readerSchema; _fields = ((Avro.RecordSchema)_schema).Fields.Select(f => new Field(f.Name, f.Pos)).ToList(); _schemaVersion = schemaVersion; if (writerSchema == null) { _reader = new GenericDatumReader <GenericRecord>(readerSchema, readerSchema); } else { _reader = new GenericDatumReader <GenericRecord>(writerSchema, readerSchema); } //_byteArrayOutputStream = new MemoryStream(); //_encoder =new BinaryEncoder(_byteArrayOutputStream); if (_schema.GetProperty(GenericAvroSchema.OFFSET_PROP) != null) { _offset = int.Parse(_schema.GetProperty(GenericAvroSchema.OFFSET_PROP).ToString()); } else { _offset = 0; } }
public void Verify() { var rsa = new RSACryptoServiceProvider(); rsa.FromXmlString(publicKey); var rsaDeformatter = new RSAPKCS1SignatureDeformatter(rsa); rsaDeformatter.SetHashAlgorithm(hashAlgorithm); var mySHA256 = SHA256.Create(); //The hash value to sign. byte[] messageBytes = mySHA256.ComputeHash(message.value); if (rsaDeformatter.VerifySignature(messageBytes, System.Convert.FromBase64String(signature))) { Console.WriteLine("The signature is valid."); //Optionally decode it var schema = Avro.Schema.Parse(File.ReadAllText("twitter.avsc")); var datumReader = new GenericDatumReader <GenericRecord>(schema, schema); var ms = new MemoryStream(message.value); var decoder = new BinaryDecoder(ms); GenericRecord rec = datumReader.Read(null, decoder); System.Console.WriteLine(rec); } else { Console.WriteLine("The signature is not valid."); } }
static void Main(string[] args) { var messages = new List <Message>(); //Read & parse the avro schema var schema = Avro.Schema.Parse(File.ReadAllText("twitter.avsc")); DatumReader <GenericRecord> datumReader = new GenericDatumReader <GenericRecord>(schema, schema); //Open a file reader on the avro binary file with data var dataFileReader = Avro.File.DataFileReader <GenericRecord> .OpenReader("twitter.avro", schema); while (dataFileReader.HasNext()) { var tweet = dataFileReader.Next(); var writer = new GenericDatumWriter <GenericRecord>(schema); MemoryStream iostr = new MemoryStream(); Avro.IO.Encoder e = new BinaryEncoder(iostr); writer.Write(tweet, e); var record = iostr.ToArray(); //System.Console.WriteLine(record.Length); messages.Add(new Message(record)); } foreach (var item in messages) { var envelope = Sign(item); envelope.Verify(); } }
/// <summary> /// Format the specified stream, writerSchema, readerSchema and reflectedObject. /// </summary> /// <returns>The format.</returns> /// <param name="stream">Stream.</param> /// <param name="writerSchema">Writer schema.</param> /// <param name="readerSchema">Reader schema.</param> /// <param name="reflectedObject">Reflected object.</param> /// <typeparam name="T">The 1st type parameter.</typeparam> public void Format <T>(Stream stream, Schema writerSchema, Schema readerSchema, ref T reflectedObject) where T : new() { var datumReader = new GenericDatumReader <GenericRecord>(writerSchema, readerSchema); var decoder = new BinaryDecoder(stream); GenericRecord genericRecord = new GenericRecord(readerSchema as RecordSchema); var result1 = datumReader.Read(genericRecord, decoder); _deserializationStrategy.Deserialize(genericRecord, ref reflectedObject, writerSchema); }
private static void checkAlternateDeserializers <S>(S expected, Stream input, long startPos, Schema ws, Schema rs) { input.Position = startPos; var reader = new GenericDatumReader <S>(ws, rs); Decoder d = new BinaryDecoder(input); S n = default(S); S output = reader.Read(n, d); Assert.AreEqual(input.Length, input.Position); // Ensure we have read everything. Assert.AreEqual(expected, output); }
private static void checkAlternateDeserializers <S>(IEnumerable <S> expectations, Stream input, long startPos, Schema ws, Schema rs) { input.Position = startPos; var reader = new GenericDatumReader <S>(ws, rs); Decoder d = new BinaryDecoder(input); foreach (var expected in expectations) { var read = Read(reader, d); Assert.AreEqual(expected, read); } Assert.AreEqual(input.Length, input.Position); // Ensure we have read everything. }
public override IEnumerable <(string, TimeSpan)> Run() { var stream = new MemoryStream(); var reader = new GenericDatumReader <T>(Schema, Schema); var writer = new GenericDatumWriter <T>(Schema); using (stream) { var encoder = new BinaryEncoder(stream); foreach (var value in Values) { writer.Write(value, encoder); } } var count = Values.Length; var size = stream.ToArray().Length *Iterations / count; stream = new MemoryStream(size); using (stream) { var decoder = new BinaryDecoder(stream); var encoder = new BinaryEncoder(stream); var stopwatch = new Stopwatch(); stopwatch.Start(); for (int i = 0; i < Iterations; i++) { writer.Write(Values[i % count], encoder); } stopwatch.Stop(); yield return("serialization", stopwatch.Elapsed); stopwatch.Reset(); stream.Position = 0; stopwatch.Start(); for (int i = 0; i < Iterations; i++) { reader.Read(default, decoder);
public static GenericRecord Parse(Schema schema, string json) { throw new NotImplementedException(); using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(json))) { try { var decoder = new BinaryDecoder(stream); //Decoder decoder = DecoderFactory.Get().JsonDecoder(schema, din); var reader = new GenericDatumReader <GenericRecord>(schema, schema); //var reader = new GenericDatumReader<GenericRecord>(schema); return((GenericRecord)reader.Read(null, decoder)); } catch (IOException ex) { throw new EPException("Failed to parse json: " + ex.Message, ex); } } }
private IList <GenericRecord> AvroToGenericRecordsToAvro(byte[] avro, RecordSchema schema) { using (MemoryStream inputStream = new MemoryStream(avro)) { GenericDatumReader <GenericRecord> reader = new GenericDatumReader <GenericRecord>(schema, schema); BinaryDecoder decoder = new BinaryDecoder(inputStream); List <GenericRecord> records = new List <GenericRecord>(); for (int i = 0; i < _numberOfRecordsInAvro; i++) { GenericRecord record = reader.Read(null, decoder); if (record == null) { break; } records.Add(record); } return(records); } }
/// <exception cref="System.IO.IOException"/> public AvroFileInputStream(FileStatus status) { pos = 0; buffer = new byte[0]; GenericDatumReader <object> reader = new GenericDatumReader <object>(); FileContext fc = FileContext.GetFileContext(new Configuration()); fileReader = DataFileReader.OpenReader(new AvroFSInput(fc, status.GetPath()), reader ); Schema schema = fileReader.GetSchema(); writer = new GenericDatumWriter <object>(schema); output = new ByteArrayOutputStream(); JsonGenerator generator = new JsonFactory().CreateJsonGenerator(output, JsonEncoding .Utf8); MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter(); prettyPrinter.SetRootValueSeparator(Runtime.GetProperty("line.separator")); generator.SetPrettyPrinter(prettyPrinter); encoder = EncoderFactory.Get().JsonEncoder(schema, generator); }
public static void Invoke() { //Arrange var fixture = new Fixture(); Dataset dataset = fixture.Create <Dataset>(); var schema = AvroConvert.GenerateSchema(typeof(Dataset)); Schema apacheSchema = Schema.Parse(schema); //AvroConvert to Apache var avroConvertSerialized = AvroConvert.SerializeHeadless(dataset, schema); Dataset apacheDeserialized; using (var ms = new MemoryStream(avroConvertSerialized)) { var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema); var decoder = new BinaryDecoder(ms); apacheDeserialized = (ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder))); } Contract.Assert(dataset == apacheDeserialized); //Apache to AvroConvert MemoryStream apacheAvroSerializeStream = new MemoryStream(); var encoder = new BinaryEncoder(apacheAvroSerializeStream); var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema); apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder); var apacheSerialized = apacheAvroSerializeStream.ToArray(); var avroConvertDeserialized = AvroConvert.DeserializeHeadless <Dataset>(apacheSerialized); Contract.Assert(dataset == avroConvertDeserialized); }
private static BenchmarkResult RunBenchmark(Dataset[] datasets, string schema) { var result = new BenchmarkResult(); Stopwatch stopwatch = Stopwatch.StartNew(); //Serialize Apache.Avro MemoryStream apacheAvroSerializeStream = new MemoryStream(); var encoder = new BinaryEncoder(apacheAvroSerializeStream); var apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset))); var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema); foreach (var dataset in datasets) { apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder); } var apacheAvro = apacheAvroSerializeStream.ToArray(); result.ApacheAvroSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize Apache.Avro List <Dataset> apacheResult = new List <Dataset>(); using (var ms = new MemoryStream(apacheAvro)) { apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset))); var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema); var decoder = new BinaryDecoder(ms); foreach (var dataset in datasets) { apacheResult.Add(ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder))); } } result.ApacheAvroDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert Headerless var avroHeadless = AvroConvert.SerializeHeadless(datasets, schema); result.AvroConvertHeadlessSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert Headerless AvroConvert.DeserializeHeadless <List <Dataset> >(avroHeadless, schema); result.AvroConvertHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert Gzip var avroGzip = AvroConvert.Serialize(datasets, CodecType.GZip); result.AvroConvertGzipSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert Gzip AvroConvert.Deserialize <Dataset[]>(avroGzip); result.AvroConvertGzipDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Serialize AvroConvert vNext var newAvro = AvroConvertToUpdate.AvroConvert.SerializeHeadless(datasets, schema); result.AvroConvertVNextHeadlessSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert vNext AvroConvertToUpdate.AvroConvert.DeserializeHeadless <Dataset[]>(newAvro, schema); result.AvroConvertVNextHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Stop(); //Serialize AvroConvert vNext Gzip var newAvroGzip = AvroConvertToUpdate.AvroConvert.Serialize(datasets, AvroConvertToUpdate.Codec.CodecType.GZip); result.AvroConvertVNextGzipSerializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Restart(); //Deserialize AvroConvert vNext Gzip AvroConvertToUpdate.AvroConvert.Deserialize <Dataset[]>(newAvroGzip); result.AvroConvertVNextGzipDeserializeTime = stopwatch.ElapsedMilliseconds; stopwatch.Stop(); //Size result.ApacheAvroSize = apacheAvro.Length; result.AvroConvertHeadlessSize = avroHeadless.Length; result.AvroConvertGzipSize = avroGzip.Length; result.AvroConvertVNextSize = newAvro.Length; result.AvroConvertVNextGzipSize = newAvroGzip.Length; return(result); }