Esempio n. 1
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var avschema = Avro.Schema.Parse(avroSchema);
            var reader   = new GenericDatumReader <GenericRecord>(avschema, avschema);

            using (var ms = new MemoryStream())
            {
                CreateSeekableStream(input, ms);
                ms.Position = 0;

                var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema);

                while (fileReader.HasNext())
                {
                    var avroRecord = fileReader.Next();

                    foreach (var column in output.Schema)
                    {
                        if (avroRecord[column.Name] != null)
                        {
                            output.Set(column.Name, avroRecord[column.Name]);
                        }
                        else
                        {
                            output.Set <object>(column.Name, null);
                        }

                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Esempio n. 2
0
        public GenericAvroReader(Schema writerSchema, Schema readerSchema, byte[] schemaVersion)
        {
            _schema        = readerSchema;
            _fields        = ((Avro.RecordSchema)_schema).Fields.Select(f => new Field(f.Name, f.Pos)).ToList();
            _schemaVersion = schemaVersion;
            if (writerSchema == null)
            {
                _reader = new GenericDatumReader <GenericRecord>(readerSchema, readerSchema);
            }
            else
            {
                _reader = new GenericDatumReader <GenericRecord>(writerSchema, readerSchema);
            }
            //_byteArrayOutputStream = new MemoryStream();
            //_encoder =new BinaryEncoder(_byteArrayOutputStream);

            if (_schema.GetProperty(GenericAvroSchema.OFFSET_PROP) != null)
            {
                _offset = int.Parse(_schema.GetProperty(GenericAvroSchema.OFFSET_PROP).ToString());
            }
            else
            {
                _offset = 0;
            }
        }
Esempio n. 3
0
        public void Verify()
        {
            var rsa = new RSACryptoServiceProvider();

            rsa.FromXmlString(publicKey);
            var rsaDeformatter = new RSAPKCS1SignatureDeformatter(rsa);

            rsaDeformatter.SetHashAlgorithm(hashAlgorithm);
            var mySHA256 = SHA256.Create();

            //The hash value to sign.
            byte[] messageBytes = mySHA256.ComputeHash(message.value);

            if (rsaDeformatter.VerifySignature(messageBytes, System.Convert.FromBase64String(signature)))
            {
                Console.WriteLine("The signature is valid.");
                //Optionally decode it
                var           schema      = Avro.Schema.Parse(File.ReadAllText("twitter.avsc"));
                var           datumReader = new GenericDatumReader <GenericRecord>(schema, schema);
                var           ms          = new MemoryStream(message.value);
                var           decoder     = new BinaryDecoder(ms);
                GenericRecord rec         = datumReader.Read(null, decoder);
                System.Console.WriteLine(rec);
            }
            else
            {
                Console.WriteLine("The signature is not valid.");
            }
        }
Esempio n. 4
0
        static void Main(string[] args)
        {
            var messages = new List <Message>();

            //Read & parse the avro schema
            var schema = Avro.Schema.Parse(File.ReadAllText("twitter.avsc"));
            DatumReader <GenericRecord> datumReader = new GenericDatumReader <GenericRecord>(schema, schema);

            //Open a file reader on the avro binary file with data
            var dataFileReader = Avro.File.DataFileReader <GenericRecord> .OpenReader("twitter.avro", schema);

            while (dataFileReader.HasNext())
            {
                var             tweet  = dataFileReader.Next();
                var             writer = new GenericDatumWriter <GenericRecord>(schema);
                MemoryStream    iostr  = new MemoryStream();
                Avro.IO.Encoder e      = new BinaryEncoder(iostr);
                writer.Write(tweet, e);
                var record = iostr.ToArray();
                //System.Console.WriteLine(record.Length);
                messages.Add(new Message(record));
            }

            foreach (var item in messages)
            {
                var envelope = Sign(item);
                envelope.Verify();
            }
        }
        /// <summary>
        /// Format the specified stream, writerSchema, readerSchema and reflectedObject.
        /// </summary>
        /// <returns>The format.</returns>
        /// <param name="stream">Stream.</param>
        /// <param name="writerSchema">Writer schema.</param>
        /// <param name="readerSchema">Reader schema.</param>
        /// <param name="reflectedObject">Reflected object.</param>
        /// <typeparam name="T">The 1st type parameter.</typeparam>
        public void Format <T>(Stream stream, Schema writerSchema, Schema readerSchema, ref T reflectedObject) where T : new()
        {
            var           datumReader   = new GenericDatumReader <GenericRecord>(writerSchema, readerSchema);
            var           decoder       = new BinaryDecoder(stream);
            GenericRecord genericRecord = new GenericRecord(readerSchema as RecordSchema);
            var           result1       = datumReader.Read(genericRecord, decoder);

            _deserializationStrategy.Deserialize(genericRecord, ref reflectedObject, writerSchema);
        }
Esempio n. 6
0
        private static void checkAlternateDeserializers <S>(S expected, Stream input, long startPos, Schema ws, Schema rs)
        {
            input.Position = startPos;
            var     reader = new GenericDatumReader <S>(ws, rs);
            Decoder d      = new BinaryDecoder(input);
            S       n      = default(S);
            S       output = reader.Read(n, d);

            Assert.AreEqual(input.Length, input.Position); // Ensure we have read everything.
            Assert.AreEqual(expected, output);
        }
Esempio n. 7
0
        private static void checkAlternateDeserializers <S>(IEnumerable <S> expectations, Stream input, long startPos, Schema ws, Schema rs)
        {
            input.Position = startPos;
            var     reader = new GenericDatumReader <S>(ws, rs);
            Decoder d      = new BinaryDecoder(input);

            foreach (var expected in expectations)
            {
                var read = Read(reader, d);
                Assert.AreEqual(expected, read);
            }
            Assert.AreEqual(input.Length, input.Position); // Ensure we have read everything.
        }
Esempio n. 8
0
        public override IEnumerable <(string, TimeSpan)> Run()
        {
            var stream = new MemoryStream();

            var reader = new GenericDatumReader <T>(Schema, Schema);
            var writer = new GenericDatumWriter <T>(Schema);

            using (stream)
            {
                var encoder = new BinaryEncoder(stream);

                foreach (var value in Values)
                {
                    writer.Write(value, encoder);
                }
            }

            var count = Values.Length;
            var size  = stream.ToArray().Length *Iterations / count;

            stream = new MemoryStream(size);

            using (stream)
            {
                var decoder = new BinaryDecoder(stream);
                var encoder = new BinaryEncoder(stream);

                var stopwatch = new Stopwatch();
                stopwatch.Start();

                for (int i = 0; i < Iterations; i++)
                {
                    writer.Write(Values[i % count], encoder);
                }

                stopwatch.Stop();
                yield return("serialization", stopwatch.Elapsed);

                stopwatch.Reset();
                stream.Position = 0;
                stopwatch.Start();

                for (int i = 0; i < Iterations; i++)
                {
                    reader.Read(default, decoder);
Esempio n. 9
0
        public static GenericRecord Parse(Schema schema, string json)
        {
            throw new NotImplementedException();

            using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(json)))
            {
                try
                {
                    var decoder = new BinaryDecoder(stream);
                    //Decoder decoder = DecoderFactory.Get().JsonDecoder(schema, din);
                    var reader = new GenericDatumReader <GenericRecord>(schema, schema);
                    //var reader = new GenericDatumReader<GenericRecord>(schema);
                    return((GenericRecord)reader.Read(null, decoder));
                }
                catch (IOException ex)
                {
                    throw new EPException("Failed to parse json: " + ex.Message, ex);
                }
            }
        }
Esempio n. 10
0
        private IList <GenericRecord> AvroToGenericRecordsToAvro(byte[] avro, RecordSchema schema)
        {
            using (MemoryStream inputStream = new MemoryStream(avro))
            {
                GenericDatumReader <GenericRecord> reader = new GenericDatumReader <GenericRecord>(schema, schema);
                BinaryDecoder        decoder = new BinaryDecoder(inputStream);
                List <GenericRecord> records = new List <GenericRecord>();

                for (int i = 0; i < _numberOfRecordsInAvro; i++)
                {
                    GenericRecord record = reader.Read(null, decoder);
                    if (record == null)
                    {
                        break;
                    }
                    records.Add(record);
                }

                return(records);
            }
        }
Esempio n. 11
0
            /// <exception cref="System.IO.IOException"/>
            public AvroFileInputStream(FileStatus status)
            {
                pos    = 0;
                buffer = new byte[0];
                GenericDatumReader <object> reader = new GenericDatumReader <object>();
                FileContext fc = FileContext.GetFileContext(new Configuration());

                fileReader = DataFileReader.OpenReader(new AvroFSInput(fc, status.GetPath()), reader
                                                       );
                Schema schema = fileReader.GetSchema();

                writer = new GenericDatumWriter <object>(schema);
                output = new ByteArrayOutputStream();
                JsonGenerator generator = new JsonFactory().CreateJsonGenerator(output, JsonEncoding
                                                                                .Utf8);
                MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();

                prettyPrinter.SetRootValueSeparator(Runtime.GetProperty("line.separator"));
                generator.SetPrettyPrinter(prettyPrinter);
                encoder = EncoderFactory.Get().JsonEncoder(schema, generator);
            }
Esempio n. 12
0
        public static void Invoke()
        {
            //Arrange
            var     fixture = new Fixture();
            Dataset dataset = fixture.Create <Dataset>();

            var    schema       = AvroConvert.GenerateSchema(typeof(Dataset));
            Schema apacheSchema = Schema.Parse(schema);


            //AvroConvert to Apache
            var avroConvertSerialized = AvroConvert.SerializeHeadless(dataset, schema);

            Dataset apacheDeserialized;

            using (var ms = new MemoryStream(avroConvertSerialized))
            {
                var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema);
                var decoder      = new BinaryDecoder(ms);
                apacheDeserialized = (ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder)));
            }

            Contract.Assert(dataset == apacheDeserialized);


            //Apache to AvroConvert
            MemoryStream apacheAvroSerializeStream = new MemoryStream();
            var          encoder      = new BinaryEncoder(apacheAvroSerializeStream);
            var          apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema);

            apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder);

            var apacheSerialized = apacheAvroSerializeStream.ToArray();

            var avroConvertDeserialized = AvroConvert.DeserializeHeadless <Dataset>(apacheSerialized);

            Contract.Assert(dataset == avroConvertDeserialized);
        }
Esempio n. 13
0
        private static BenchmarkResult RunBenchmark(Dataset[] datasets, string schema)
        {
            var result = new BenchmarkResult();

            Stopwatch stopwatch = Stopwatch.StartNew();


            //Serialize Apache.Avro
            MemoryStream apacheAvroSerializeStream = new MemoryStream();
            var          encoder      = new BinaryEncoder(apacheAvroSerializeStream);
            var          apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset)));

            var apacheWriter = new GenericDatumWriter <GenericRecord>(apacheSchema);

            foreach (var dataset in datasets)
            {
                apacheWriter.Write(ApacheAvroHelpers.Create(dataset, apacheSchema), encoder);
            }

            var apacheAvro = apacheAvroSerializeStream.ToArray();

            result.ApacheAvroSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize Apache.Avro

            List <Dataset> apacheResult = new List <Dataset>();

            using (var ms = new MemoryStream(apacheAvro))
            {
                apacheSchema = Schema.Parse(AvroConvert.GenerateSchema(typeof(Dataset)));
                var apacheReader = new GenericDatumReader <GenericRecord>(apacheSchema, apacheSchema);
                var decoder      = new BinaryDecoder(ms);
                foreach (var dataset in datasets)
                {
                    apacheResult.Add(ApacheAvroHelpers.Decreate <Dataset>(apacheReader.Read(null, decoder)));
                }
            }
            result.ApacheAvroDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert Headerless
            var avroHeadless = AvroConvert.SerializeHeadless(datasets, schema);

            result.AvroConvertHeadlessSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert Headerless
            AvroConvert.DeserializeHeadless <List <Dataset> >(avroHeadless, schema);
            result.AvroConvertHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert Gzip
            var avroGzip = AvroConvert.Serialize(datasets, CodecType.GZip);

            result.AvroConvertGzipSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert Gzip
            AvroConvert.Deserialize <Dataset[]>(avroGzip);
            result.AvroConvertGzipDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();


            //Serialize AvroConvert vNext
            var newAvro = AvroConvertToUpdate.AvroConvert.SerializeHeadless(datasets, schema);

            result.AvroConvertVNextHeadlessSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert vNext
            AvroConvertToUpdate.AvroConvert.DeserializeHeadless <Dataset[]>(newAvro, schema);
            result.AvroConvertVNextHeadlessDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Stop();

            //Serialize AvroConvert vNext Gzip
            var newAvroGzip = AvroConvertToUpdate.AvroConvert.Serialize(datasets, AvroConvertToUpdate.Codec.CodecType.GZip);

            result.AvroConvertVNextGzipSerializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Restart();

            //Deserialize AvroConvert vNext Gzip
            AvroConvertToUpdate.AvroConvert.Deserialize <Dataset[]>(newAvroGzip);
            result.AvroConvertVNextGzipDeserializeTime = stopwatch.ElapsedMilliseconds;
            stopwatch.Stop();


            //Size
            result.ApacheAvroSize           = apacheAvro.Length;
            result.AvroConvertHeadlessSize  = avroHeadless.Length;
            result.AvroConvertGzipSize      = avroGzip.Length;
            result.AvroConvertVNextSize     = newAvro.Length;
            result.AvroConvertVNextGzipSize = newAvroGzip.Length;

            return(result);
        }