public void SequentialWriter_MicrosoftWriterApacherReaderOfDictionary() { var expected = new List <ContainingDictionaryClass <string, string> >(); for (var i = 0; i < 7; i++) { expected.Add(ContainingDictionaryClass <string, string> .Create( new Dictionary <string, string> { { "testkey" + i, "testvalue" + i } })); } var w = AvroContainer.CreateWriter <ContainingDictionaryClass <string, string> >(this.resultStream, Codec.Deflate); using (var writer = new SequentialWriter <ContainingDictionaryClass <string, string> >(w, 2)) { expected.ForEach(writer.Write); } this.resultStream.Seek(0, SeekOrigin.Begin); var reader = DataFileReader <GenericRecord> .OpenReader(this.resultStream); var actual = new List <GenericRecord>(reader); Assert.AreEqual(expected.Count, actual.Count); for (var i = 0; i < expected.Count; ++i) { var actualValue = actual[i]["Property"] as Dictionary <string, object>; Assert.IsNotNull(actualValue); Assert.AreEqual(actualValue["testkey" + i] as string, expected[i].Property["testkey" + i]); } }
public void SequentialWriter_MicrosoftWriterApacheReader() { var expected = new List <ClassOfInt>(); for (var i = 0; i < 7; i++) { expected.Add(ClassOfInt.Create(true)); } var w = AvroContainer.CreateWriter <ClassOfInt>(this.resultStream, Codec.Deflate); using (var writer = new SequentialWriter <ClassOfInt>(w, 2)) { expected.ForEach(writer.Write); } this.resultStream.Seek(0, SeekOrigin.Begin); var reader = DataFileReader <GenericRecord> .OpenReader(this.resultStream); var actual = new List <GenericRecord>(reader); for (var i = 0; i < expected.Count; ++i) { Assert.AreEqual(expected[i].PrimitiveInt, actual[i]["PrimitiveInt"]); } }
public void Dump() { // Get the blob reference var storageAccount = CloudStorageAccount.Parse(StorageConnectionString); var blobClient = storageAccount.CreateCloudBlobClient(); //var blob = blobClient.GetBlobReferenceFromServer(new Uri(EventHubsCaptureAvroBlobUri)); using (var dataTable = GetWindTurbineMetricsTable()) { // Parse the Avro File using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead())) { while (avroReader.HasNext()) { GenericRecord r = avroReader.Next(); byte[] body = (byte[])r["Body"]; var windTurbineMeasure = DeserializeToWindTurbineMeasure(body); // Add the row to in memory table AddWindTurbineMetricToTable(dataTable, windTurbineMeasure); } } if (dataTable.Rows.Count > 0) { BatchInsert(dataTable); } } }
public void Dump() { // Get the blob reference BlobClient blob = new BlobClient(new Uri(EventHubsCaptureAvroBlobUri)); using (var dataTable = GetWindTurbineMetricsTable()) { // Parse the Avro File using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead())) { while (avroReader.HasNext()) { GenericRecord r = avroReader.Next(); byte[] body = (byte[])r["Body"]; var windTurbineMeasure = DeserializeToWindTurbineMeasure(body); // Add the row to in memory table AddWindTurbineMetricToTable(dataTable, windTurbineMeasure); } } if (dataTable.Rows.Count > 0) { BatchInsert(dataTable); } } }
public void TestInterop(string inputDir) { // Resolve inputDir relative to the TestDirectory inputDir = Path.Combine(TestContext.CurrentContext.TestDirectory, inputDir); Assert.True(Directory.Exists(inputDir), "Input directory does not exist. Run `build.sh interop-data-generate` first."); foreach (var avroFile in Directory.EnumerateFiles(inputDir, "*.avro")) { var codec = Path.GetFileNameWithoutExtension(avroFile).Split('_'); if (1 < codec.Length && !InteropDataConstants.SupportedCodecNames.Contains(codec[1])) { continue; } using (var reader = DataFileReader <GenericRecord> .OpenReader(avroFile)) { int i = 0; foreach (var record in reader.NextEntries) { i++; Assert.IsNotNull(record); } Assert.AreNotEqual(0, i); } } }
public void Dump() { // Get the blob reference BlobContainerClient blobContainer = new BlobContainerClient(StorageConnectionString, EventHubsCaptureAvroBlobContainer); BlobClient blob = blobContainer.GetBlobClient(EventHubsCaptureAvroBlobName); // Download the content to a memory stream using (Stream blobStream = new MemoryStream()) { blob.DownloadToAsync(blobStream); using (var dataTable = GetWindTurbineMetricsTable()) { // Parse the Avro File using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blobStream)) { while (avroReader.HasNext()) { GenericRecord r = avroReader.Next(); byte[] body = (byte[])r["Body"]; var windTurbineMeasure = DeserializeToWindTurbineMeasure(body); // Add the row to in memory table AddWindTurbineMetricToTable(dataTable, windTurbineMeasure); } } if (dataTable.Rows.Count > 0) { BatchInsert(dataTable); } } } }
internal void Build(AvroFileAccess <V> fileAccess) { Preconditions.CheckNotNullArgument(fileAccess); Preconditions.CheckArgument(fileAccess.Stream.Name == FileName); Preconditions.CheckArgument(fileAccess.Stream.CanRead); lock (Index) { Index = new Dictionary <string, object>(); long oldPosition = fileAccess.Stream.Position; try { fileAccess.Stream.Position = 0; IFileReader <GenericRecord> reader = DataFileReader <GenericRecord> .OpenReader(fileAccess.Stream, fileAccess.ValueDef.Schema); while (reader.HasNext()) { long position = reader.PreviousSync(); GenericRecord record = reader.Next(); V value = fileAccess.ValueDef.GetValue(record); Index.Add(fileAccess.ValueDef.GetValueKey(value), position); IsIndexWritePending = true; } } finally { fileAccess.Stream.Position = oldPosition; } } }
public void TestMetaData(string key, object value, Codec.Type codecType, bool useTypeGetter) { // create and write out object[] obj = new object[] { new object[] { "John", 23 } }; IList <Foo> records = MakeRecords(obj); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(specificSchema); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { SetMetaData(dataFileWriter, key, value); foreach (Foo rec in records) { dataFileWriter.Append(rec); } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { Assert.IsTrue(ValidateMetaData(reader, key, value, useTypeGetter), string.Format("Error validating header meta data for key: {0}, expected value: {1}", key, value)); } }
/// <summary> /// Dumps the data from the Avro blob to the data warehouse (DW). /// Before running this, ensure that the DW has the required <see cref="TableName"/> table created. /// </summary> private static async void Dump(Uri fileUri) { // Get the blob reference var storageAccount = CloudStorageAccount.Parse(StorageConnectionString); var blobClient = storageAccount.CreateCloudBlobClient(); var blob = await blobClient.GetBlobReferenceFromServerAsync(fileUri); using (var dataTable = GetWindTurbineMetricsTable()) { // Parse the Avro File Stream blobStream = await blob.OpenReadAsync(null, null, null); using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blobStream)) { while (avroReader.HasNext()) { GenericRecord r = avroReader.Next(); byte[] body = (byte[])r["Body"]; var windTurbineMeasure = DeserializeToWindTurbineMeasure(body); // Add the row to in memory table AddWindTurbineMetricToTable(dataTable, windTurbineMeasure); } } if (dataTable.Rows.Count > 0) { BatchInsert(dataTable); } } }
public override IEnumerable <T> Deserialize(Stream stream) { IFileReader <GenericRecord> reader = null; try { reader = DataFileReader <GenericRecord> .OpenReader(stream); } catch (Exception e) { this.diagnostics.WriteError( briefMessage: "Unable to open stream as avro. Please check if the stream is from eventhub capture. https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-capture-overview ", detailedMessage: e.Message); throw; } foreach (GenericRecord genericRecord in reader.NextEntries) { EventDataFromCapture eventData = this.ConvertToEventDataFromCapture(genericRecord); // deserialize records from eventdata body. foreach (T record in this.DeserializeEventData(eventData)) { yield return(record); } } reader.Dispose(); }
/// <summary> /// Dumps the data from the Avro blob to the data warehouse (DW). /// Before running this, ensure that the DW has the required <see cref="TableName"/> table created. /// </summary> private static async void Dump(Uri fileUri) { // Get the blob reference BlobClient blob = new BlobClient(fileUri); using (var dataTable = GetWindTurbineMetricsTable()) { // Parse the Avro File Stream blobStream = await blob.OpenReadAsync(null); using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blobStream)) { while (avroReader.HasNext()) { GenericRecord r = avroReader.Next(); byte[] body = (byte[])r["Body"]; var windTurbineMeasure = DeserializeToWindTurbineMeasure(body); // Add the row to in memory table AddWindTurbineMetricToTable(dataTable, windTurbineMeasure); } } if (dataTable.Rows.Count > 0) { BatchInsert(dataTable); } } }
static void Main(string[] args) { String schema = new StreamReader("user.avsc").ReadToEnd(); Avro.Schema avschema = Avro.Schema.Parse(schema); DatumReader <User> reader = new Avro.Specific.SpecificDatumReader <User>(avschema, avschema); Stream inStr = new FileStream("users.avro", FileMode.Open); IFileReader <User> dataFileReader = DataFileReader <User> .OpenReader(inStr, avschema); while (dataFileReader.HasNext()) { User record = dataFileReader.Next(); Console.WriteLine("Specific Obj Read ==>" + record.name + ":" + record.favorite_color + ":" + record.favorite_number); } inStr.Close(); inStr = new FileStream("users.avro", FileMode.Open); DatumReader <GenericRecord> reader2 = new Avro.Generic.GenericDatumReader <GenericRecord>(avschema, avschema); IFileReader <GenericRecord> gdataFileReader = DataFileReader <GenericRecord> .OpenReader(inStr, avschema); while (gdataFileReader.HasNext()) { GenericRecord grecord = gdataFileReader.Next(); Console.WriteLine("Generic mode of read==>" + grecord["name"] + ":" + grecord["favorite_color"] + ":" + grecord["favorite_number"]); } Console.Write("Hit ENTER to Close:"); Console.ReadLine(); }
public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output) { var avschema = Avro.Schema.Parse(avroSchema); var reader = new GenericDatumReader <GenericRecord>(avschema, avschema); using (var ms = new MemoryStream()) { CreateSeekableStream(input, ms); ms.Position = 0; var fileReader = DataFileReader <GenericRecord> .OpenReader(ms, avschema); while (fileReader.HasNext()) { var avroRecord = fileReader.Next(); foreach (var column in output.Schema) { if (avroRecord[column.Name] != null) { output.Set(column.Name, avroRecord[column.Name]); } else { output.Set <object>(column.Name, null); } yield return(output.AsReadOnly()); } } } }
/// <summary> /// Dumps the data from the Avro blob to the data warehouse (DW). /// Before running this, ensure that the DW has the required <see cref="TableName"/> table created. /// </summary> private static void Dump(Uri fileUri) { // Get the blob reference var storageAccount = CloudStorageAccount.Parse(StorageConnectionString); var blobClient = storageAccount.CreateCloudBlobClient(); var blob = blobClient.GetBlobReferenceFromServer(fileUri); using (var dataTable = GetStoveTempMetrics()) { // Parse the Avro File using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead())) { while (avroReader.HasNext()) { GenericRecord r = avroReader.Next(); byte[] body = (byte[])r["Body"]; var stoveTempMeasure = DeserializeToStoveTempMeasure(body); // Add the row to in memory table AddStoveMetricsToTable(dataTable, stoveTempMeasure); } } if (dataTable.Rows.Count > 0) { BatchInsert(dataTable); } } }
internal static IEnumerable <EventData> ReadAvroStreamToEventHubData(this Stream stream, string partitionKey) { using var reader = DataFileReader <GenericRecord> .OpenReader(stream); while (reader.HasNext()) { GenericRecord genericAvroRecord = reader.Next(); var body = genericAvroRecord.GetValue <byte[]>(nameof(EventData.Body)); var sequenceNumber = genericAvroRecord.GetValue <long>(nameof(EventData.SystemProperties.SequenceNumber)); var enqueuedTimeUtc = genericAvroRecord.GetValue <string>(nameof(EventData.SystemProperties.EnqueuedTimeUtc)).ParseTime(); var offset = genericAvroRecord.GetValue <string>(nameof(EventData.SystemProperties.Offset)); var systemPropertiesCollection = new EventData.SystemPropertiesCollection( sequenceNumber: sequenceNumber, enqueuedTimeUtc: enqueuedTimeUtc, offset: offset, partitionKey: partitionKey); genericAvroRecord .GetValue <Dictionary <string, object> >(nameof(EventData.SystemProperties)) .Foreach(x => systemPropertiesCollection.Add(x.Key, x.Value)); IEnumerator <Field> avroSchemaField = genericAvroRecord.Schema.GetEnumerator(); while (avroSchemaField.MoveNext()) { var currentAvroSchemaField = avroSchemaField.Current; var currentFieldName = currentAvroSchemaField.Name; if (currentFieldName == nameof(EventData.Body)) { continue; } if (currentFieldName == nameof(EventData.Properties)) { continue; } if (currentFieldName == nameof(EventData.SystemProperties)) { continue; } if (genericAvroRecord.TryGetValue(currentFieldName, out object prop)) { systemPropertiesCollection[currentFieldName] = prop; } } EventData eventData = new(body) { SystemProperties = systemPropertiesCollection }; genericAvroRecord .GetValue <Dictionary <string, object> >(nameof(EventData.Properties)) .Foreach(eventData.Properties.Add); yield return(eventData); } } }
// Disabled due to long runtime [TestCase(specificSchema, Codec.Type.Deflate, 1000, 588, 998)] public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int iterations, int firstSyncPosition, int secondSyncPosition) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { for (int i = 0; i < iterations; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); } // write out block if (i == firstSyncPosition || i == secondSyncPosition) { dataFileWriter.Sync(); } } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read syncs IList <long> syncs = new List <long>(); using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { long previousSync = -1; foreach (Foo foo in reader.NextEntries) { if (reader.PreviousSync() != previousSync && reader.Tell() != reader.PreviousSync()) // EOF { previousSync = reader.PreviousSync(); syncs.Add(previousSync); } } // verify syncs wth seeks reader.Sync(0); // first sync Assert.AreEqual(reader.PreviousSync(), syncs[0], string.Format("Error syncing reader to position: {0}", syncs[0])); foreach (long sync in syncs) // the rest { reader.Seek(sync); Foo foo = reader.Next(); Assert.IsNotNull(foo, string.Format("Error seeking to sync position: {0}", sync)); } } }
static void Main(string[] args) { using (var reader = DataFileReader <GenericRecord> .OpenReader(@"weather.avro")) { WriteHeader(reader); foreach (var entry in reader.NextEntries) { Print(entry); } } }
private bool ReadGeneric <T>(Stream input, T value) { IFileReader <T> reader = DataFileReader <T> .OpenReader(input); IList <T> readFoos = new List <T>(); foreach (T foo in reader.NextEntries) { readFoos.Add(foo); } return(readFoos != null && readFoos.Count > 0); }
public void Container_MicrosoftWriterApacherReaderOfNestedType() { var expected = new List <NestedClass>(); for (var i = 0; i < 7; i++) { expected.Add(NestedClass.Create(true)); } using (var memoryStream = new MemoryStream()) { var writer = AvroContainer.CreateWriter <NestedClass>(memoryStream, new AvroSerializerSettings { Resolver = new AvroDataContractResolver(true) }, Codec.Deflate); var i = 0; while (i < expected.Count) { var block = writer.CreateBlockAsync().Result; for (var j = 0; j < 2; j++) { if (i >= expected.Count) { break; } block.Write(expected[i]); i++; } writer.WriteBlockAsync(block).Wait(); } writer.Dispose(); memoryStream.Seek(0, SeekOrigin.Begin); var reader = DataFileReader <GenericRecord> .OpenReader(memoryStream); var actual = new List <GenericRecord>(reader); for (var k = 0; k < expected.Count; ++k) { Assert.Equal(expected[k].PrimitiveInt, actual[k]["PrimitiveInt"]); if (expected[k].ClassOfIntReference == null) { Assert.Null(actual[k]["ClassOfIntReference"]); } else { Assert.Equal(expected[k].ClassOfIntReference.PrimitiveInt, (actual[k]["ClassOfIntReference"] as GenericRecord)["PrimitiveInt"]); } } } }
public void Container_MicrosoftWriterApacherReaderOfDictionary() { var expected = new List <ContainingDictionaryClass <string, string> >(); for (var i = 0; i < 7; i++) { expected.Add(ContainingDictionaryClass <string, string> .Create( new Dictionary <string, string> { { "testkey" + i, "testvalue" + i } })); } using (var memoryStream = new MemoryStream()) { var writer = AvroContainer.CreateWriter <ContainingDictionaryClass <string, string> >(memoryStream, Codec.Deflate); { var i = 0; while (i < expected.Count) { var block = writer.CreateBlockAsync().Result; for (var j = 0; j < 2; j++) { if (i >= expected.Count) { break; } block.Write(expected[i]); i++; } writer.WriteBlockAsync(block).Wait(); } writer.Dispose(); } memoryStream.Seek(0, SeekOrigin.Begin); var reader = DataFileReader <GenericRecord> .OpenReader(memoryStream); var actual = new List <GenericRecord>(reader); Assert.Equal(expected.Count, actual.Count); for (var i = 0; i < expected.Count; ++i) { var actualValue = actual[i]["Property"] as Dictionary <string, object>; Assert.Equal(actualValue["testkey" + i] as string, expected[i].Property["testkey" + i]); } } }
public override void OpenDataSource(string sourceAddress, int rowsPerPage) { base.OpenDataSource(sourceAddress, rowsPerPage); dataFileReader = DataFileReader <GenericRecord> .OpenReader(fileStream); var metaKeys = dataFileReader.GetMetaKeys(); foreach (string key in metaKeys) { string prop = dataFileReader.GetMetaString(key); SetProperty(key, prop); } }
[TestCase(specificSchema, Codec.Type.Null, 0, 330)] // 330 public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { for (int i = 0; i < 10; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); } // write out block if (i == 1 || i == 4) { dataFileWriter.Sync(); } } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back IList <Foo> readRecords = new List <Foo>(); using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { // move to next block from position reader.Sync(position); // read records from synced position foreach (Foo rec in reader.NextEntries) { readRecords.Add(rec); } } Assert.IsTrue((readRecords != null && readRecords.Count == expectedRecords), string.Format("Error performing partial read after position: {0}", position)); }
private static IEnumerable <ReaderWriterPair <T> > GenericOptions <T>() { yield return(new ReaderWriterPair <T> { CreateReader = (stream, schema) => DataFileReader <T> .OpenReader(stream, schema), CreateWriter = (stream, schema, codec) => DataFileWriter <T> .OpenWriter(new GenericWriter <T>(schema), stream, codec) }); yield return(new ReaderWriterPair <T> { CreateReader = (stream, schema) => DataFileReader <T> .OpenReader(stream, schema, (ws, rs) => new GenericDatumReader <T>(ws, rs)), CreateWriter = (stream, schema, codec) => DataFileWriter <T> .OpenWriter(new GenericDatumWriter <T>(schema), stream, codec) }); }
private static void WriteAndRead <T>(T datum) where T : ISpecificRecord { Console.Write($"{typeof(T).Name}"); try { var tempFile = Path.GetTempFileName(); var writer = new SpecificDatumWriter <T>(datum.Schema); using (var dfw = DataFileWriter <T> .OpenWriter(writer, tempFile)) { dfw.Append(datum); } using (var dfr = DataFileReader <T> .OpenReader(tempFile, datum.Schema)) { while (dfr.HasNext()) { var readDatum = dfr.Next(); } } var prevColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Green; Console.SetCursorPosition(0, Console.CursorTop); Console.WriteLine($"✓ {typeof(T).Name}"); Console.ForegroundColor = prevColor; } catch (Exception ex) { var prevColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Red; Console.SetCursorPosition(0, Console.CursorTop); Console.WriteLine($"X {typeof(T).Name}"); Console.ForegroundColor = prevColor; if (!ex.Message.Contains("Unable to find type ")) { Console.WriteLine($"Unexpected Exception: {ex.Message}"); } } }
static void Main(string[] args) { var schema = Schema.Parse(AvroSerializer.Create <Blog>().WriterSchema.ToString()); var inputs = new List <Blog> { new Blog { BlogId = 101, Name = "Tanaka", Author = "One" }, new Blog { BlogId = 201, Name = "Sato", Author = "Two" }, new Blog { BlogId = 301, Name = "Suzuki", Author = "Three" } }; var writer = new SpecificDatumWriter <Blog>(schema); using (var fw = DataFileWriter <Blog> .OpenWriter(writer, "./blog.avro")) { foreach (var blog in inputs) { fw.Append(blog); } } var outputs = new List <Blog>(); using (var fr = DataFileReader <Blog> .OpenReader("./blog.avro")) { while (fr.HasNext()) { outputs.Add(fr.Next()); } } foreach (var b in outputs) { Console.WriteLine("----- Avro → POCO 変換後 -----"); Console.WriteLine($"{b.BlogId} {b.Name} {b.Author}"); } }
public void Container_MicrosoftWriterApacheReader() { var expected = new List <ClassOfInt>(); for (var i = 0; i < 7; i++) { expected.Add(ClassOfInt.Create(true)); } using (var memoryStream = new MemoryStream()) { var writer = AvroContainer.CreateWriter <ClassOfInt>(memoryStream, Codec.Deflate); var i = 0; while (i < expected.Count) { var block = writer.CreateBlockAsync().Result; for (var j = 0; j < 2; j++) { if (i >= expected.Count) { break; } block.Write(expected[i]); i++; } writer.WriteBlockAsync(block).Wait(); } writer.Dispose(); memoryStream.Seek(0, SeekOrigin.Begin); var reader = DataFileReader <GenericRecord> .OpenReader(memoryStream); var actual = new List <GenericRecord>(reader); for (var k = 0; k < expected.Count; ++k) { Assert.Equal(expected[k].PrimitiveInt, actual[k]["PrimitiveInt"]); } } }
/// <exception cref="System.IO.IOException"/> public AvroFileInputStream(FileStatus status) { pos = 0; buffer = new byte[0]; GenericDatumReader <object> reader = new GenericDatumReader <object>(); FileContext fc = FileContext.GetFileContext(new Configuration()); fileReader = DataFileReader.OpenReader(new AvroFSInput(fc, status.GetPath()), reader ); Schema schema = fileReader.GetSchema(); writer = new GenericDatumWriter <object>(schema); output = new ByteArrayOutputStream(); JsonGenerator generator = new JsonFactory().CreateJsonGenerator(output, JsonEncoding .Utf8); MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter(); prettyPrinter.SetRootValueSeparator(Runtime.GetProperty("line.separator")); generator.SetPrettyPrinter(prettyPrinter); encoder = EncoderFactory.Get().JsonEncoder(schema, generator); }
public void TestDifferentReaderSchema() { RecordSchema writerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}," + "{\"name\":\"f2\", \"type\":\"string\"}]}") as RecordSchema; Schema readerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}," + "{\"name\":\"f3\", \"type\":\"string\", \"default\":\"test\"}]}"); MemoryStream dataFileOutputStream = new MemoryStream(); WriteGeneric(dataFileOutputStream, writerSchema, mkRecord(new [] { "f1", "f1val", "f2", "f2val" }, writerSchema), Codec.Type.Null); MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); using (IFileReader <GenericRecord> reader = DataFileReader <GenericRecord> .OpenReader(dataFileInputStream, readerSchema)) { GenericRecord result = reader.Next(); object ignore; Assert.IsFalse(result.TryGetValue("f2", out ignore)); Assert.AreEqual("f1val", result["f1"]); Assert.AreEqual("test", result["f3"]); } }
/// <summary> /// Dumps the data from the Avro blob to the data warehouse (DW). /// Before running this, ensure that the DW has the required <see cref="TableName" /> table created. /// </summary> private static IEnumerable <DeviceMeasurement> Dump(Uri fileUri) { // Get the blob reference var storageAccount = CloudStorageAccount.Parse(StorageConnectionString); var blobClient = storageAccount.CreateCloudBlobClient(); var blob = blobClient.GetBlobReferenceFromServer(fileUri); // Parse the Avro File using (var avroReader = DataFileReader <GenericRecord> .OpenReader(blob.OpenRead())) { while (avroReader.HasNext()) { var r = avroReader.Next(); var body = (byte[])r["Body"]; var payload = Encoding.ASCII.GetString(body); var measurement = JsonConvert.DeserializeObject <DeviceMeasurement>(payload); yield return(measurement); } } }
public void SequentialWriter_MicrosoftWriterApacherReaderOfNestedType() { var expected = new List <NestedClass>(); for (var i = 0; i < 7; i++) { expected.Add(NestedClass.Create(true)); } var w = AvroContainer.CreateWriter <NestedClass>(this.resultStream, new AvroSerializerSettings { Resolver = new AvroDataContractResolver(true) }, Codec.Deflate); using (var writer = new SequentialWriter <NestedClass>(w, 2)) { expected.ForEach(writer.Write); } this.resultStream.Seek(0, SeekOrigin.Begin); var reader = DataFileReader <GenericRecord> .OpenReader(this.resultStream); var actual = new List <GenericRecord>(reader); for (var i = 0; i < expected.Count; ++i) { Assert.AreEqual(expected[i].PrimitiveInt, actual[i]["PrimitiveInt"]); if (expected[i].ClassOfIntReference == null) { Assert.IsNull(actual[i]["ClassOfIntReference"]); } else { Assert.IsNotNull(actual[i]["ClassOfIntReference"] as GenericRecord); Assert.AreEqual(expected[i].ClassOfIntReference.PrimitiveInt, (actual[i]["ClassOfIntReference"] as GenericRecord)["PrimitiveInt"]); } } }