Example #1
0
        public void SequentialWriter_MicrosoftWriterApacherReaderOfDictionary()
        {
            var expected = new List <ContainingDictionaryClass <string, string> >();

            for (var i = 0; i < 7; i++)
            {
                expected.Add(ContainingDictionaryClass <string, string> .Create(
                                 new Dictionary <string, string>
                {
                    { "testkey" + i, "testvalue" + i }
                }));
            }

            var w = AvroContainer.CreateWriter <ContainingDictionaryClass <string, string> >(this.resultStream, Codec.Deflate);

            using (var writer = new SequentialWriter <ContainingDictionaryClass <string, string> >(w, 2))
            {
                expected.ForEach(writer.Write);
            }

            this.resultStream.Seek(0, SeekOrigin.Begin);

            var reader = DataFileReader <GenericRecord> .OpenReader(this.resultStream);

            var actual = new List <GenericRecord>(reader);

            Assert.AreEqual(expected.Count, actual.Count);

            for (var i = 0; i < expected.Count; ++i)
            {
                var actualValue = actual[i]["Property"] as Dictionary <string, object>;
                Assert.IsNotNull(actualValue);
                Assert.AreEqual(actualValue["testkey" + i] as string, expected[i].Property["testkey" + i]);
            }
        }
Example #2
0
        public void SequentialWriter_MicrosoftWriterApacheReader()
        {
            var expected = new List <ClassOfInt>();

            for (var i = 0; i < 7; i++)
            {
                expected.Add(ClassOfInt.Create(true));
            }

            var w = AvroContainer.CreateWriter <ClassOfInt>(this.resultStream, Codec.Deflate);

            using (var writer = new SequentialWriter <ClassOfInt>(w, 2))
            {
                expected.ForEach(writer.Write);
            }

            this.resultStream.Seek(0, SeekOrigin.Begin);
            var reader = DataFileReader <GenericRecord> .OpenReader(this.resultStream);

            var actual = new List <GenericRecord>(reader);

            for (var i = 0; i < expected.Count; ++i)
            {
                Assert.AreEqual(expected[i].PrimitiveInt, actual[i]["PrimitiveInt"]);
            }
        }
Example #3
0
        public void Dump()
        {
            // Get the blob reference
            var storageAccount = CloudStorageAccount.Parse(StorageConnectionString);
            var blobClient     = storageAccount.CreateCloudBlobClient();

            //var blob = blobClient.GetBlobReferenceFromServer(new Uri(EventHubsCaptureAvroBlobUri));

            using (var dataTable = GetWindTurbineMetricsTable())
            {
                // Parse the Avro File
                using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead()))
                {
                    while (avroReader.HasNext())
                    {
                        GenericRecord r = avroReader.Next();

                        byte[] body = (byte[])r["Body"];
                        var    windTurbineMeasure = DeserializeToWindTurbineMeasure(body);

                        // Add the row to in memory table
                        AddWindTurbineMetricToTable(dataTable, windTurbineMeasure);
                    }
                }

                if (dataTable.Rows.Count > 0)
                {
                    BatchInsert(dataTable);
                }
            }
        }
        public void Dump()
        {
            // Get the blob reference

            BlobClient blob = new BlobClient(new Uri(EventHubsCaptureAvroBlobUri));

            using (var dataTable = GetWindTurbineMetricsTable())
            {
                // Parse the Avro File
                using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead()))
                {
                    while (avroReader.HasNext())
                    {
                        GenericRecord r = avroReader.Next();

                        byte[] body = (byte[])r["Body"];
                        var    windTurbineMeasure = DeserializeToWindTurbineMeasure(body);

                        // Add the row to in memory table
                        AddWindTurbineMetricToTable(dataTable, windTurbineMeasure);
                    }
                }

                if (dataTable.Rows.Count > 0)
                {
                    BatchInsert(dataTable);
                }
            }
        }
Example #5
0
        public void TestInterop(string inputDir)
        {
            // Resolve inputDir relative to the TestDirectory
            inputDir = Path.Combine(TestContext.CurrentContext.TestDirectory, inputDir);

            Assert.True(Directory.Exists(inputDir),
                        "Input directory does not exist. Run `build.sh interop-data-generate` first.");

            foreach (var avroFile in Directory.EnumerateFiles(inputDir, "*.avro"))
            {
                var codec = Path.GetFileNameWithoutExtension(avroFile).Split('_');
                if (1 < codec.Length && !InteropDataConstants.SupportedCodecNames.Contains(codec[1]))
                {
                    continue;
                }

                using (var reader = DataFileReader <GenericRecord> .OpenReader(avroFile))
                {
                    int i = 0;
                    foreach (var record in reader.NextEntries)
                    {
                        i++;
                        Assert.IsNotNull(record);
                    }
                    Assert.AreNotEqual(0, i);
                }
            }
        }
        public void Dump()
        {
            // Get the blob reference
            BlobContainerClient blobContainer = new BlobContainerClient(StorageConnectionString, EventHubsCaptureAvroBlobContainer);
            BlobClient          blob          = blobContainer.GetBlobClient(EventHubsCaptureAvroBlobName);

            // Download the content to a memory stream
            using (Stream blobStream = new MemoryStream())
            {
                blob.DownloadToAsync(blobStream);

                using (var dataTable = GetWindTurbineMetricsTable())
                {
                    // Parse the Avro File
                    using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blobStream))
                    {
                        while (avroReader.HasNext())
                        {
                            GenericRecord r = avroReader.Next();

                            byte[] body = (byte[])r["Body"];
                            var    windTurbineMeasure = DeserializeToWindTurbineMeasure(body);

                            // Add the row to in memory table
                            AddWindTurbineMetricToTable(dataTable, windTurbineMeasure);
                        }
                    }

                    if (dataTable.Rows.Count > 0)
                    {
                        BatchInsert(dataTable);
                    }
                }
            }
        }
Example #7
0
        internal void Build(AvroFileAccess <V> fileAccess)
        {
            Preconditions.CheckNotNullArgument(fileAccess);
            Preconditions.CheckArgument(fileAccess.Stream.Name == FileName);
            Preconditions.CheckArgument(fileAccess.Stream.CanRead);

            lock (Index)
            {
                Index = new Dictionary <string, object>();
                long oldPosition = fileAccess.Stream.Position;
                try
                {
                    fileAccess.Stream.Position = 0;
                    IFileReader <GenericRecord> reader = DataFileReader <GenericRecord> .OpenReader(fileAccess.Stream, fileAccess.ValueDef.Schema);

                    while (reader.HasNext())
                    {
                        long          position = reader.PreviousSync();
                        GenericRecord record   = reader.Next();
                        V             value    = fileAccess.ValueDef.GetValue(record);
                        Index.Add(fileAccess.ValueDef.GetValueKey(value), position);
                        IsIndexWritePending = true;
                    }
                }
                finally
                {
                    fileAccess.Stream.Position = oldPosition;
                }
            }
        }
Example #8
0
        public void TestMetaData(string key, object value, Codec.Type codecType, bool useTypeGetter)
        {
            // create and write out
            object[]     obj     = new object[] { new object[] { "John", 23 } };
            IList <Foo>  records = MakeRecords(obj);
            MemoryStream dataFileOutputStream = new MemoryStream();

            Schema            schema = Schema.Parse(specificSchema);
            DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema);

            using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType)))
            {
                SetMetaData(dataFileWriter, key, value);
                foreach (Foo rec in records)
                {
                    dataFileWriter.Append(rec);
                }
            }

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            // read back
            using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream))
            {
                Assert.IsTrue(ValidateMetaData(reader, key, value, useTypeGetter),
                              string.Format("Error validating header meta data for key: {0}, expected value: {1}", key, value));
            }
        }
Example #9
0
        /// <summary>
        /// Dumps the data from the Avro blob to the data warehouse (DW).
        /// Before running this, ensure that the DW has the required <see cref="TableName"/> table created.
        /// </summary>
        private static async void Dump(Uri fileUri)
        {
            // Get the blob reference
            var storageAccount = CloudStorageAccount.Parse(StorageConnectionString);
            var blobClient     = storageAccount.CreateCloudBlobClient();
            var blob           = await blobClient.GetBlobReferenceFromServerAsync(fileUri);

            using (var dataTable = GetWindTurbineMetricsTable())
            {
                // Parse the Avro File
                Stream blobStream = await blob.OpenReadAsync(null, null, null);

                using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blobStream))
                {
                    while (avroReader.HasNext())
                    {
                        GenericRecord r = avroReader.Next();

                        byte[] body = (byte[])r["Body"];
                        var    windTurbineMeasure = DeserializeToWindTurbineMeasure(body);

                        // Add the row to in memory table
                        AddWindTurbineMetricToTable(dataTable, windTurbineMeasure);
                    }
                }

                if (dataTable.Rows.Count > 0)
                {
                    BatchInsert(dataTable);
                }
            }
        }
        public override IEnumerable <T> Deserialize(Stream stream)
        {
            IFileReader <GenericRecord> reader = null;

            try
            {
                reader = DataFileReader <GenericRecord> .OpenReader(stream);
            }
            catch (Exception e)
            {
                this.diagnostics.WriteError(
                    briefMessage: "Unable to open stream as avro. Please check if the stream is from eventhub capture. https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-capture-overview ",
                    detailedMessage: e.Message);
                throw;
            }

            foreach (GenericRecord genericRecord in reader.NextEntries)
            {
                EventDataFromCapture eventData = this.ConvertToEventDataFromCapture(genericRecord);

                // deserialize records from eventdata body.
                foreach (T record in this.DeserializeEventData(eventData))
                {
                    yield return(record);
                }
            }

            reader.Dispose();
        }
        /// <summary>
        /// Dumps the data from the Avro blob to the data warehouse (DW).
        /// Before running this, ensure that the DW has the required <see cref="TableName"/> table created.
        /// </summary>
        private static async void Dump(Uri fileUri)
        {
            // Get the blob reference
            BlobClient blob = new BlobClient(fileUri);

            using (var dataTable = GetWindTurbineMetricsTable())
            {
                // Parse the Avro File
                Stream blobStream = await blob.OpenReadAsync(null);

                using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blobStream))
                {
                    while (avroReader.HasNext())
                    {
                        GenericRecord r = avroReader.Next();

                        byte[] body = (byte[])r["Body"];
                        var    windTurbineMeasure = DeserializeToWindTurbineMeasure(body);

                        // Add the row to in memory table
                        AddWindTurbineMetricToTable(dataTable, windTurbineMeasure);
                    }
                }

                if (dataTable.Rows.Count > 0)
                {
                    BatchInsert(dataTable);
                }
            }
        }
Example #12
0
        static void Main(string[] args)
        {
            String schema = new StreamReader("user.avsc").ReadToEnd();

            Avro.Schema avschema = Avro.Schema.Parse(schema);


            DatumReader <User> reader         = new Avro.Specific.SpecificDatumReader <User>(avschema, avschema);
            Stream             inStr          = new FileStream("users.avro", FileMode.Open);
            IFileReader <User> dataFileReader = DataFileReader <User> .OpenReader(inStr, avschema);

            while (dataFileReader.HasNext())
            {
                User record = dataFileReader.Next();
                Console.WriteLine("Specific Obj Read ==>" + record.name + ":" + record.favorite_color + ":" + record.favorite_number);
            }
            inStr.Close();

            inStr = new FileStream("users.avro", FileMode.Open);
            DatumReader <GenericRecord> reader2         = new Avro.Generic.GenericDatumReader <GenericRecord>(avschema, avschema);
            IFileReader <GenericRecord> gdataFileReader = DataFileReader <GenericRecord> .OpenReader(inStr, avschema);

            while (gdataFileReader.HasNext())
            {
                GenericRecord grecord = gdataFileReader.Next();
                Console.WriteLine("Generic mode of read==>" + grecord["name"] + ":" + grecord["favorite_color"] + ":" + grecord["favorite_number"]);
            }

            Console.Write("Hit ENTER to Close:");
            Console.ReadLine();
        }
Example #13
0
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            var avschema = Avro.Schema.Parse(avroSchema);
            var reader   = new GenericDatumReader <GenericRecord>(avschema, avschema);

            using (var ms = new MemoryStream())
            {
                CreateSeekableStream(input, ms);
                ms.Position = 0;

                var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema);

                while (fileReader.HasNext())
                {
                    var avroRecord = fileReader.Next();

                    foreach (var column in output.Schema)
                    {
                        if (avroRecord[column.Name] != null)
                        {
                            output.Set(column.Name, avroRecord[column.Name]);
                        }
                        else
                        {
                            output.Set <object>(column.Name, null);
                        }

                        yield return(output.AsReadOnly());
                    }
                }
            }
        }
Example #14
0
        /// <summary>
        /// Dumps the data from the Avro blob to the data warehouse (DW).
        /// Before running this, ensure that the DW has the required <see cref="TableName"/> table created.
        /// </summary>
        private static void Dump(Uri fileUri)
        {
            // Get the blob reference
            var storageAccount = CloudStorageAccount.Parse(StorageConnectionString);
            var blobClient     = storageAccount.CreateCloudBlobClient();
            var blob           = blobClient.GetBlobReferenceFromServer(fileUri);

            using (var dataTable = GetStoveTempMetrics())
            {
                // Parse the Avro File
                using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead()))
                {
                    while (avroReader.HasNext())
                    {
                        GenericRecord r = avroReader.Next();

                        byte[] body             = (byte[])r["Body"];
                        var    stoveTempMeasure = DeserializeToStoveTempMeasure(body);

                        // Add the row to in memory table
                        AddStoveMetricsToTable(dataTable, stoveTempMeasure);
                    }
                }

                if (dataTable.Rows.Count > 0)
                {
                    BatchInsert(dataTable);
                }
            }
        }
        internal static IEnumerable <EventData> ReadAvroStreamToEventHubData(this Stream stream, string partitionKey)
        {
            using var reader = DataFileReader <GenericRecord> .OpenReader(stream);

            while (reader.HasNext())
            {
                GenericRecord genericAvroRecord = reader.Next();

                var body            = genericAvroRecord.GetValue <byte[]>(nameof(EventData.Body));
                var sequenceNumber  = genericAvroRecord.GetValue <long>(nameof(EventData.SystemProperties.SequenceNumber));
                var enqueuedTimeUtc = genericAvroRecord.GetValue <string>(nameof(EventData.SystemProperties.EnqueuedTimeUtc)).ParseTime();
                var offset          = genericAvroRecord.GetValue <string>(nameof(EventData.SystemProperties.Offset));

                var systemPropertiesCollection = new EventData.SystemPropertiesCollection(
                    sequenceNumber: sequenceNumber, enqueuedTimeUtc: enqueuedTimeUtc,
                    offset: offset, partitionKey: partitionKey);
                genericAvroRecord
                .GetValue <Dictionary <string, object> >(nameof(EventData.SystemProperties))
                .Foreach(x => systemPropertiesCollection.Add(x.Key, x.Value));

                IEnumerator <Field> avroSchemaField = genericAvroRecord.Schema.GetEnumerator();
                while (avroSchemaField.MoveNext())
                {
                    var currentAvroSchemaField = avroSchemaField.Current;
                    var currentFieldName       = currentAvroSchemaField.Name;

                    if (currentFieldName == nameof(EventData.Body))
                    {
                        continue;
                    }
                    if (currentFieldName == nameof(EventData.Properties))
                    {
                        continue;
                    }
                    if (currentFieldName == nameof(EventData.SystemProperties))
                    {
                        continue;
                    }

                    if (genericAvroRecord.TryGetValue(currentFieldName, out object prop))
                    {
                        systemPropertiesCollection[currentFieldName] = prop;
                    }
                }

                EventData eventData = new(body)
                {
                    SystemProperties = systemPropertiesCollection
                };

                genericAvroRecord
                .GetValue <Dictionary <string, object> >(nameof(EventData.Properties))
                .Foreach(eventData.Properties.Add);

                yield return(eventData);
            }
        }
    }
Example #16
0
        // Disabled due to long runtime [TestCase(specificSchema, Codec.Type.Deflate, 1000, 588, 998)]
        public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int iterations, int firstSyncPosition, int secondSyncPosition)
        {
            // create and write out
            IList <Foo>  records = MakeRecords(GetTestFooObject());
            MemoryStream dataFileOutputStream = new MemoryStream();

            Schema            schema = Schema.Parse(schemaStr);
            DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema);

            using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType)))
            {
                for (int i = 0; i < iterations; ++i)
                {
                    foreach (Foo foo in records)
                    {
                        dataFileWriter.Append(foo);
                    }

                    // write out block
                    if (i == firstSyncPosition || i == secondSyncPosition)
                    {
                        dataFileWriter.Sync();
                    }
                }
            }

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            // read syncs
            IList <long> syncs = new List <long>();

            using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream))
            {
                long previousSync = -1;

                foreach (Foo foo in reader.NextEntries)
                {
                    if (reader.PreviousSync() != previousSync &&
                        reader.Tell() != reader.PreviousSync()) // EOF
                    {
                        previousSync = reader.PreviousSync();
                        syncs.Add(previousSync);
                    }
                }

                // verify syncs wth seeks
                reader.Sync(0); // first sync
                Assert.AreEqual(reader.PreviousSync(), syncs[0],
                                string.Format("Error syncing reader to position: {0}", syncs[0]));

                foreach (long sync in syncs) // the rest
                {
                    reader.Seek(sync);
                    Foo foo = reader.Next();
                    Assert.IsNotNull(foo, string.Format("Error seeking to sync position: {0}", sync));
                }
            }
        }
Example #17
0
        static void Main(string[] args)
        {
            using (var reader = DataFileReader <GenericRecord> .OpenReader(@"weather.avro"))
            {
                WriteHeader(reader);

                foreach (var entry in reader.NextEntries)
                {
                    Print(entry);
                }
            }
        }
Example #18
0
        private bool ReadGeneric <T>(Stream input, T value)
        {
            IFileReader <T> reader = DataFileReader <T> .OpenReader(input);

            IList <T> readFoos = new List <T>();

            foreach (T foo in reader.NextEntries)
            {
                readFoos.Add(foo);
            }
            return(readFoos != null && readFoos.Count > 0);
        }
        public void Container_MicrosoftWriterApacherReaderOfNestedType()
        {
            var expected = new List <NestedClass>();

            for (var i = 0; i < 7; i++)
            {
                expected.Add(NestedClass.Create(true));
            }

            using (var memoryStream = new MemoryStream())
            {
                var writer = AvroContainer.CreateWriter <NestedClass>(memoryStream, new AvroSerializerSettings {
                    Resolver = new AvroDataContractResolver(true)
                }, Codec.Deflate);

                var i = 0;
                while (i < expected.Count)
                {
                    var block = writer.CreateBlockAsync().Result;
                    for (var j = 0; j < 2; j++)
                    {
                        if (i >= expected.Count)
                        {
                            break;
                        }
                        block.Write(expected[i]);
                        i++;
                    }
                    writer.WriteBlockAsync(block).Wait();
                }
                writer.Dispose();

                memoryStream.Seek(0, SeekOrigin.Begin);

                var reader = DataFileReader <GenericRecord> .OpenReader(memoryStream);

                var actual = new List <GenericRecord>(reader);

                for (var k = 0; k < expected.Count; ++k)
                {
                    Assert.Equal(expected[k].PrimitiveInt, actual[k]["PrimitiveInt"]);
                    if (expected[k].ClassOfIntReference == null)
                    {
                        Assert.Null(actual[k]["ClassOfIntReference"]);
                    }
                    else
                    {
                        Assert.Equal(expected[k].ClassOfIntReference.PrimitiveInt, (actual[k]["ClassOfIntReference"] as GenericRecord)["PrimitiveInt"]);
                    }
                }
            }
        }
        public void Container_MicrosoftWriterApacherReaderOfDictionary()
        {
            var expected = new List <ContainingDictionaryClass <string, string> >();

            for (var i = 0; i < 7; i++)
            {
                expected.Add(ContainingDictionaryClass <string, string> .Create(
                                 new Dictionary <string, string>
                {
                    { "testkey" + i, "testvalue" + i }
                }));
            }

            using (var memoryStream = new MemoryStream())
            {
                var writer = AvroContainer.CreateWriter <ContainingDictionaryClass <string, string> >(memoryStream, Codec.Deflate);

                {
                    var i = 0;
                    while (i < expected.Count)
                    {
                        var block = writer.CreateBlockAsync().Result;
                        for (var j = 0; j < 2; j++)
                        {
                            if (i >= expected.Count)
                            {
                                break;
                            }
                            block.Write(expected[i]);
                            i++;
                        }
                        writer.WriteBlockAsync(block).Wait();
                    }
                    writer.Dispose();
                }

                memoryStream.Seek(0, SeekOrigin.Begin);

                var reader = DataFileReader <GenericRecord> .OpenReader(memoryStream);

                var actual = new List <GenericRecord>(reader);

                Assert.Equal(expected.Count, actual.Count);

                for (var i = 0; i < expected.Count; ++i)
                {
                    var actualValue = actual[i]["Property"] as Dictionary <string, object>;
                    Assert.Equal(actualValue["testkey" + i] as string, expected[i].Property["testkey" + i]);
                }
            }
        }
Example #21
0
        public override void OpenDataSource(string sourceAddress, int rowsPerPage)
        {
            base.OpenDataSource(sourceAddress, rowsPerPage);

            dataFileReader = DataFileReader <GenericRecord> .OpenReader(fileStream);

            var metaKeys = dataFileReader.GetMetaKeys();

            foreach (string key in metaKeys)
            {
                string prop = dataFileReader.GetMetaString(key);
                SetProperty(key, prop);
            }
        }
Example #22
0
        [TestCase(specificSchema, Codec.Type.Null, 0, 330)]   // 330
        public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords)
        {
            // create and write out
            IList <Foo> records = MakeRecords(GetTestFooObject());

            MemoryStream dataFileOutputStream = new MemoryStream();

            Schema            schema = Schema.Parse(schemaStr);
            DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema);

            using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType)))
            {
                for (int i = 0; i < 10; ++i)
                {
                    foreach (Foo foo in records)
                    {
                        dataFileWriter.Append(foo);
                    }

                    // write out block
                    if (i == 1 || i == 4)
                    {
                        dataFileWriter.Sync();
                    }
                }
            }

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            // read back
            IList <Foo> readRecords = new List <Foo>();

            using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream))
            {
                // move to next block from position
                reader.Sync(position);

                // read records from synced position
                foreach (Foo rec in reader.NextEntries)
                {
                    readRecords.Add(rec);
                }
            }

            Assert.IsTrue((readRecords != null && readRecords.Count == expectedRecords),
                          string.Format("Error performing partial read after position: {0}", position));
        }
Example #23
0
        private static IEnumerable <ReaderWriterPair <T> > GenericOptions <T>()
        {
            yield return(new ReaderWriterPair <T>
            {
                CreateReader = (stream, schema) => DataFileReader <T> .OpenReader(stream, schema),
                CreateWriter = (stream, schema, codec) =>
                               DataFileWriter <T> .OpenWriter(new GenericWriter <T>(schema), stream, codec)
            });

            yield return(new ReaderWriterPair <T>
            {
                CreateReader = (stream, schema) => DataFileReader <T> .OpenReader(stream, schema,
                                                                                  (ws, rs) => new GenericDatumReader <T>(ws, rs)),
                CreateWriter = (stream, schema, codec) =>
                               DataFileWriter <T> .OpenWriter(new GenericDatumWriter <T>(schema), stream, codec)
            });
        }
Example #24
0
        private static void WriteAndRead <T>(T datum)
            where T : ISpecificRecord
        {
            Console.Write($"{typeof(T).Name}");

            try
            {
                var tempFile = Path.GetTempFileName();
                var writer   = new SpecificDatumWriter <T>(datum.Schema);

                using (var dfw = DataFileWriter <T> .OpenWriter(writer, tempFile))
                {
                    dfw.Append(datum);
                }

                using (var dfr = DataFileReader <T> .OpenReader(tempFile, datum.Schema))
                {
                    while (dfr.HasNext())
                    {
                        var readDatum = dfr.Next();
                    }
                }


                var prevColor = Console.ForegroundColor;
                Console.ForegroundColor = ConsoleColor.Green;
                Console.SetCursorPosition(0, Console.CursorTop);
                Console.WriteLine($"✓ {typeof(T).Name}");
                Console.ForegroundColor = prevColor;
            }
            catch (Exception ex)
            {
                var prevColor = Console.ForegroundColor;
                Console.ForegroundColor = ConsoleColor.Red;
                Console.SetCursorPosition(0, Console.CursorTop);
                Console.WriteLine($"X {typeof(T).Name}");
                Console.ForegroundColor = prevColor;

                if (!ex.Message.Contains("Unable to find type "))
                {
                    Console.WriteLine($"Unexpected Exception: {ex.Message}");
                }
            }
        }
Example #25
0
        static void Main(string[] args)
        {
            var schema = Schema.Parse(AvroSerializer.Create <Blog>().WriterSchema.ToString());

            var inputs = new List <Blog>
            {
                new Blog {
                    BlogId = 101, Name = "Tanaka", Author = "One"
                },
                new Blog {
                    BlogId = 201, Name = "Sato", Author = "Two"
                },
                new Blog {
                    BlogId = 301, Name = "Suzuki", Author = "Three"
                }
            };

            var writer = new SpecificDatumWriter <Blog>(schema);

            using (var fw = DataFileWriter <Blog> .OpenWriter(writer, "./blog.avro"))
            {
                foreach (var blog in inputs)
                {
                    fw.Append(blog);
                }
            }

            var outputs = new List <Blog>();

            using (var fr = DataFileReader <Blog> .OpenReader("./blog.avro"))
            {
                while (fr.HasNext())
                {
                    outputs.Add(fr.Next());
                }
            }

            foreach (var b in outputs)
            {
                Console.WriteLine("----- Avro → POCO 変換後 -----");
                Console.WriteLine($"{b.BlogId} {b.Name} {b.Author}");
            }
        }
        public void Container_MicrosoftWriterApacheReader()
        {
            var expected = new List <ClassOfInt>();

            for (var i = 0; i < 7; i++)
            {
                expected.Add(ClassOfInt.Create(true));
            }

            using (var memoryStream = new MemoryStream())
            {
                var writer = AvroContainer.CreateWriter <ClassOfInt>(memoryStream, Codec.Deflate);

                var i = 0;
                while (i < expected.Count)
                {
                    var block = writer.CreateBlockAsync().Result;
                    for (var j = 0; j < 2; j++)
                    {
                        if (i >= expected.Count)
                        {
                            break;
                        }
                        block.Write(expected[i]);
                        i++;
                    }
                    writer.WriteBlockAsync(block).Wait();
                }
                writer.Dispose();

                memoryStream.Seek(0, SeekOrigin.Begin);
                var reader = DataFileReader <GenericRecord> .OpenReader(memoryStream);

                var actual = new List <GenericRecord>(reader);

                for (var k = 0; k < expected.Count; ++k)
                {
                    Assert.Equal(expected[k].PrimitiveInt, actual[k]["PrimitiveInt"]);
                }
            }
        }
Example #27
0
            /// <exception cref="System.IO.IOException"/>
            public AvroFileInputStream(FileStatus status)
            {
                pos    = 0;
                buffer = new byte[0];
                GenericDatumReader <object> reader = new GenericDatumReader <object>();
                FileContext fc = FileContext.GetFileContext(new Configuration());

                fileReader = DataFileReader.OpenReader(new AvroFSInput(fc, status.GetPath()), reader
                                                       );
                Schema schema = fileReader.GetSchema();

                writer = new GenericDatumWriter <object>(schema);
                output = new ByteArrayOutputStream();
                JsonGenerator generator = new JsonFactory().CreateJsonGenerator(output, JsonEncoding
                                                                                .Utf8);
                MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();

                prettyPrinter.SetRootValueSeparator(Runtime.GetProperty("line.separator"));
                generator.SetPrettyPrinter(prettyPrinter);
                encoder = EncoderFactory.Get().JsonEncoder(schema, generator);
            }
Example #28
0
        public void TestDifferentReaderSchema()
        {
            RecordSchema writerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"},"
                                                     + "{\"name\":\"f2\", \"type\":\"string\"}]}") as RecordSchema;
            Schema readerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"},"
                                               + "{\"name\":\"f3\", \"type\":\"string\", \"default\":\"test\"}]}");

            MemoryStream dataFileOutputStream = new MemoryStream();

            WriteGeneric(dataFileOutputStream, writerSchema, mkRecord(new [] { "f1", "f1val", "f2", "f2val" }, writerSchema), Codec.Type.Null);

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            using (IFileReader <GenericRecord> reader = DataFileReader <GenericRecord> .OpenReader(dataFileInputStream, readerSchema))
            {
                GenericRecord result = reader.Next();
                object        ignore;
                Assert.IsFalse(result.TryGetValue("f2", out ignore));
                Assert.AreEqual("f1val", result["f1"]);
                Assert.AreEqual("test", result["f3"]);
            }
        }
Example #29
0
        /// <summary>
        ///     Dumps the data from the Avro blob to the data warehouse (DW).
        ///     Before running this, ensure that the DW has the required <see cref="TableName" /> table created.
        /// </summary>
        private static IEnumerable <DeviceMeasurement> Dump(Uri fileUri)
        {
            // Get the blob reference
            var storageAccount = CloudStorageAccount.Parse(StorageConnectionString);
            var blobClient     = storageAccount.CreateCloudBlobClient();
            var blob           = blobClient.GetBlobReferenceFromServer(fileUri);

            // Parse the Avro File
            using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead()))
            {
                while (avroReader.HasNext())
                {
                    var r = avroReader.Next();

                    var body = (byte[])r["Body"];

                    var payload     = Encoding.ASCII.GetString(body);
                    var measurement = JsonConvert.DeserializeObject <DeviceMeasurement>(payload);
                    yield return(measurement);
                }
            }
        }
Example #30
0
        public void SequentialWriter_MicrosoftWriterApacherReaderOfNestedType()
        {
            var expected = new List <NestedClass>();

            for (var i = 0; i < 7; i++)
            {
                expected.Add(NestedClass.Create(true));
            }

            var w = AvroContainer.CreateWriter <NestedClass>(this.resultStream, new AvroSerializerSettings {
                Resolver = new AvroDataContractResolver(true)
            }, Codec.Deflate);

            using (var writer = new SequentialWriter <NestedClass>(w, 2))
            {
                expected.ForEach(writer.Write);
            }

            this.resultStream.Seek(0, SeekOrigin.Begin);

            var reader = DataFileReader <GenericRecord> .OpenReader(this.resultStream);

            var actual = new List <GenericRecord>(reader);

            for (var i = 0; i < expected.Count; ++i)
            {
                Assert.AreEqual(expected[i].PrimitiveInt, actual[i]["PrimitiveInt"]);
                if (expected[i].ClassOfIntReference == null)
                {
                    Assert.IsNull(actual[i]["ClassOfIntReference"]);
                }
                else
                {
                    Assert.IsNotNull(actual[i]["ClassOfIntReference"] as GenericRecord);
                    Assert.AreEqual(expected[i].ClassOfIntReference.PrimitiveInt, (actual[i]["ClassOfIntReference"] as GenericRecord)["PrimitiveInt"]);
                }
            }
        }