public async Task TestReadMultipleRecordBatchAsync() { RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100); RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema); await writer.WriteRecordBatchAsync(originalBatch1); await writer.WriteRecordBatchAsync(originalBatch2); await writer.WriteEndAsync(); stream.Position = 0; ArrowFileReader reader = new ArrowFileReader(stream); RecordBatch readBatch1 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch1); RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1); ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch2); // now read the first again, for random access RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch3); } }
public async Task TestReadMultipleRecordBatchAsync() { RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100); RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema); await writer.WriteRecordBatchAsync(originalBatch1); await writer.WriteRecordBatchAsync(originalBatch2); await writer.WriteFooterAsync(); stream.Position = 0; // the recordbatches by index are in reverse order - back to front. // TODO: is this a bug?? ArrowFileReader reader = new ArrowFileReader(stream); RecordBatch readBatch1 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch1); RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1); ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch2); // now read the first again, for random access RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch3); } }
public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Position = 0; var memoryPool = new TestMemoryAllocator(); ArrowFileReader reader = new ArrowFileReader(stream, memoryPool, leaveOpen: shouldLeaveOpen); reader.ReadNextRecordBatch(); Assert.Equal(1, memoryPool.Statistics.Allocations); Assert.True(memoryPool.Statistics.BytesAllocated > 0); reader.Dispose(); if (shouldLeaveOpen) { Assert.True(stream.Position > 0); } else { Assert.Throws <ObjectDisposedException>(() => stream.Position); } } }
public static async Task Main(string[] args) { // Use a specific memory pool from which arrays will be allocated (optional) var memoryAllocator = new NativeMemoryAllocator(alignment: 64); // Build a record batch using the Fluent API var recordBatch = new RecordBatch.Builder(memoryAllocator) .Append("Column A", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append("Column B", false, col => col.Float(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => Convert.ToSingle(x * 2))))) .Append("Column C", false, col => col.String(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => $"Item {x+1}")))) .Append("Column D", false, col => col.Boolean(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => x % 2 == 0)))) .Build(); // Print memory allocation statistics Console.WriteLine("Allocations: {0}", memoryAllocator.Statistics.Allocations); Console.WriteLine("Allocated: {0} byte(s)", memoryAllocator.Statistics.BytesAllocated); // Write record batch to a file using (var stream = File.OpenWrite("test.arrow")) using (var writer = new ArrowFileWriter(stream, recordBatch.Schema)) { await writer.WriteRecordBatchAsync(recordBatch); await writer.WriteFooterAsync(); } Console.WriteLine("Done"); Console.ReadKey(); }
public async Task WritesFooterAlignedMulitpleOf8() { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); var stream = new MemoryStream(); var writer = new ArrowFileWriter( stream, originalBatch.Schema, leaveOpen: true, // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix // which causes the length prefix to not be 8-byte aligned by default new IpcOptions() { WriteLegacyIpcFormat = true }); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Position = 0; var reader = new ArrowFileReader(stream); int count = await reader.RecordBatchCountAsync(); Assert.Equal(1, count); RecordBatch readBatch = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch, readBatch); }
private static async Task TestReadRecordBatchHelper( Func <ArrowFileReader, RecordBatch, Task> verificationFunc) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteFooterAsync(); stream.Position = 0; ArrowFileReader reader = new ArrowFileReader(stream); await verificationFunc(reader, originalBatch); } }
private async Task <int> JsonToArrow() { JsonFile jsonFile = await ParseJsonFile(); Schema schema = CreateSchema(jsonFile.Schema); using (FileStream fs = ArrowFileInfo.Create()) { ArrowFileWriter writer = new ArrowFileWriter(fs, schema); await writer.WriteStartAsync(); foreach (var jsonRecordBatch in jsonFile.Batches) { RecordBatch batch = CreateRecordBatch(schema, jsonRecordBatch); await writer.WriteRecordBatchAsync(batch); } await writer.WriteEndAsync(); await fs.FlushAsync(); } return(0); }
public async Task WritesFooterAlignedMulitpleOf8Async() { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); var stream = new MemoryStream(); var writer = new ArrowFileWriter( stream, originalBatch.Schema, leaveOpen: true, // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix // which causes the length prefix to not be 8-byte aligned by default new IpcOptions() { WriteLegacyIpcFormat = true }); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Position = 0; await ValidateRecordBatchFile(stream, originalBatch); }