public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Position = 0; var memoryPool = new TestMemoryAllocator(); ArrowFileReader reader = new ArrowFileReader(stream, memoryPool, leaveOpen: shouldLeaveOpen); reader.ReadNextRecordBatch(); Assert.Equal(1, memoryPool.Statistics.Allocations); Assert.True(memoryPool.Statistics.BytesAllocated > 0); reader.Dispose(); if (shouldLeaveOpen) { Assert.True(stream.Position > 0); } else { Assert.Throws <ObjectDisposedException>(() => stream.Position); } } }
public async Task TestReadMultipleRecordBatchAsync() { RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100); RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema); await writer.WriteRecordBatchAsync(originalBatch1); await writer.WriteRecordBatchAsync(originalBatch2); await writer.WriteFooterAsync(); stream.Position = 0; // the recordbatches by index are in reverse order - back to front. // TODO: is this a bug?? ArrowFileReader reader = new ArrowFileReader(stream); RecordBatch readBatch1 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch1); RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1); ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch2); // now read the first again, for random access RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch3); } }
public async Task TestReadMultipleRecordBatchAsync() { RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100); RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema); await writer.WriteRecordBatchAsync(originalBatch1); await writer.WriteRecordBatchAsync(originalBatch2); await writer.WriteEndAsync(); stream.Position = 0; ArrowFileReader reader = new ArrowFileReader(stream); RecordBatch readBatch1 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch1); RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1); ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch2); // now read the first again, for random access RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch3); } }
public async Task WritesFooterAlignedMulitpleOf8() { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); var stream = new MemoryStream(); var writer = new ArrowFileWriter( stream, originalBatch.Schema, leaveOpen: true, // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix // which causes the length prefix to not be 8-byte aligned by default new IpcOptions() { WriteLegacyIpcFormat = true }); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Position = 0; var reader = new ArrowFileReader(stream); int count = await reader.RecordBatchCountAsync(); Assert.Equal(1, count); RecordBatch readBatch = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(originalBatch, readBatch); }
private async Task ValidateRecordBatchFile(Stream stream, RecordBatch recordBatch) { var reader = new ArrowFileReader(stream); int count = await reader.RecordBatchCountAsync(); Assert.Equal(1, count); RecordBatch readBatch = await reader.ReadRecordBatchAsync(0); ArrowReaderVerifier.CompareBatches(recordBatch, readBatch); }
private static async Task TestReadRecordBatchHelper( Func <ArrowFileReader, RecordBatch, Task> verificationFunc) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); using (MemoryStream stream = new MemoryStream()) { ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteFooterAsync(); stream.Position = 0; ArrowFileReader reader = new ArrowFileReader(stream); await verificationFunc(reader, originalBatch); } }
public async Task WritesEmptyFile() { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1); var stream = new MemoryStream(); var writer = new ArrowFileWriter(stream, originalBatch.Schema); writer.WriteStart(); writer.WriteEnd(); stream.Position = 0; var reader = new ArrowFileReader(stream); int count = await reader.RecordBatchCountAsync(); Assert.Equal(0, count); RecordBatch readBatch = reader.ReadNextRecordBatch(); Assert.Null(readBatch); SchemaComparer.Compare(originalBatch.Schema, reader.Schema); }
private async Task <int> Validate() { JsonFile jsonFile = await ParseJsonFile(); using FileStream arrowFileStream = ArrowFileInfo.OpenRead(); using ArrowFileReader reader = new ArrowFileReader(arrowFileStream); int batchCount = await reader.RecordBatchCountAsync(); if (batchCount != jsonFile.Batches.Count) { Console.WriteLine($"Incorrect batch count. JsonFile: {jsonFile.Batches.Count}, ArrowFile: {batchCount}"); return(-1); } Schema jsonFileSchema = CreateSchema(jsonFile.Schema); Schema arrowFileSchema = reader.Schema; SchemaComparer.Compare(jsonFileSchema, arrowFileSchema); for (int i = 0; i < batchCount; i++) { RecordBatch arrowFileRecordBatch = reader.ReadNextRecordBatch(); RecordBatch jsonFileRecordBatch = CreateRecordBatch(jsonFileSchema, jsonFile.Batches[i]); ArrowReaderVerifier.CompareBatches(jsonFileRecordBatch, arrowFileRecordBatch, strictCompare: false); } // ensure there are no more batches in the file if (reader.ReadNextRecordBatch() != null) { Console.WriteLine($"The ArrowFile has more RecordBatches than it should."); return(-1); } return(0); }