public async Task CanWriteToNetworkStreamAsync() { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); const int port = 32154; TcpListener listener = new TcpListener(IPAddress.Loopback, port); listener.Start(); using (TcpClient sender = new TcpClient()) { sender.Connect(IPAddress.Loopback, port); NetworkStream stream = sender.GetStream(); using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema)) { await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Flush(); } } using (TcpClient receiver = listener.AcceptTcpClient()) { NetworkStream stream = receiver.GetStream(); using (var reader = new ArrowStreamReader(stream)) { RecordBatch newBatch = reader.ReadNextRecordBatch(); ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); } } }
private static async Task TestRoundTripRecordBatchesAsync(List <RecordBatch> originalBatches, IpcOptions options = null) { using (MemoryStream stream = new MemoryStream()) { using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options)) { foreach (RecordBatch originalBatch in originalBatches) { await writer.WriteRecordBatchAsync(originalBatch); } await writer.WriteEndAsync(); } stream.Position = 0; using (var reader = new ArrowStreamReader(stream)) { foreach (RecordBatch originalBatch in originalBatches) { RecordBatch newBatch = reader.ReadNextRecordBatch(); ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); } } } }
public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen, bool createDictionaryArray, int expectedAllocations) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); using (MemoryStream stream = new MemoryStream()) { ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Position = 0; var memoryPool = new TestMemoryAllocator(); ArrowStreamReader reader = new ArrowStreamReader(stream, memoryPool, shouldLeaveOpen); reader.ReadNextRecordBatch(); Assert.Equal(expectedAllocations, memoryPool.Statistics.Allocations); Assert.True(memoryPool.Statistics.BytesAllocated > 0); reader.Dispose(); if (shouldLeaveOpen) { Assert.True(stream.Position > 0); } else { Assert.Throws <ObjectDisposedException>(() => stream.Position); } } }
/// <summary> /// Verifies that the stream reader reads multiple times when a stream /// only returns a subset of the data from each Read. /// </summary> private static async Task TestReaderFromPartialReadStream(Func <ArrowStreamReader, RecordBatch, Task> verificationFunc, bool createDictionaryArray) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); using (PartialReadStream stream = new PartialReadStream()) { ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema); await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); stream.Position = 0; ArrowStreamReader reader = new ArrowStreamReader(stream); await verificationFunc(reader, originalBatch); } }
public async Task WritesEmptyFileAsync() { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1); var stream = new MemoryStream(); var writer = new ArrowStreamWriter(stream, originalBatch.Schema); await writer.WriteStartAsync(); await writer.WriteEndAsync(); stream.Position = 0; var reader = new ArrowStreamReader(stream); RecordBatch readBatch = reader.ReadNextRecordBatch(); Assert.Null(readBatch); SchemaComparer.Compare(originalBatch.Schema, reader.Schema); }
public async Task WriteLegacyIpcFormatAsync(bool writeLegacyIpcFormat) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); var options = new IpcOptions() { WriteLegacyIpcFormat = writeLegacyIpcFormat }; using (MemoryStream stream = new MemoryStream()) { using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true, options)) { await writer.WriteRecordBatchAsync(originalBatch); await writer.WriteEndAsync(); } stream.Position = 0; // ensure the continuation is written correctly byte[] buffer = stream.ToArray(); int messageLength = BinaryPrimitives.ReadInt32LittleEndian(buffer); int endOfBuffer1 = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 8)); int endOfBuffer2 = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 4)); if (writeLegacyIpcFormat) { // the legacy IPC format doesn't have a continuation token at the start Assert.NotEqual(-1, messageLength); Assert.NotEqual(-1, endOfBuffer1); } else { // the latest IPC format has a continuation token at the start Assert.Equal(-1, messageLength); Assert.Equal(-1, endOfBuffer1); } Assert.Equal(0, endOfBuffer2); } }
private static async Task TestReaderFromStream( Func <ArrowStreamReader, RecordBatch, Task> verificationFunc, bool writeEnd) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); using (MemoryStream stream = new MemoryStream()) { ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema); await writer.WriteRecordBatchAsync(originalBatch); if (writeEnd) { await writer.WriteEndAsync(); } stream.Position = 0; ArrowStreamReader reader = new ArrowStreamReader(stream); await verificationFunc(reader, originalBatch); } }
public async Task WriteBatchWithCorrectPaddingAsync() { byte value1 = 0x04; byte value2 = 0x14; var batch = new RecordBatch( new Schema.Builder() .Field(f => f.Name("age").DataType(Int32Type.Default)) .Field(f => f.Name("characterCount").DataType(Int32Type.Default)) .Build(), new IArrowArray[] { new Int32Array( new ArrowBuffer(new byte[] { value1, value1, 0x00, 0x00 }), ArrowBuffer.Empty, length: 1, nullCount: 0, offset: 0), new Int32Array( new ArrowBuffer(new byte[] { value2, value2, 0x00, 0x00 }), ArrowBuffer.Empty, length: 1, nullCount: 0, offset: 0) }, length: 1); await TestRoundTripRecordBatchAsync(batch); using (MemoryStream stream = new MemoryStream()) { using (var writer = new ArrowStreamWriter(stream, batch.Schema, leaveOpen: true)) { await writer.WriteRecordBatchAsync(batch); await writer.WriteEndAsync(); } byte[] writtenBytes = stream.ToArray(); // ensure that the data buffers at the end are 8-byte aligned Assert.Equal(value1, writtenBytes[writtenBytes.Length - 24]); Assert.Equal(value1, writtenBytes[writtenBytes.Length - 23]); for (int i = 22; i > 16; i--) { Assert.Equal(0, writtenBytes[writtenBytes.Length - i]); } Assert.Equal(value2, writtenBytes[writtenBytes.Length - 16]); Assert.Equal(value2, writtenBytes[writtenBytes.Length - 15]); for (int i = 14; i > 8; i--) { Assert.Equal(0, writtenBytes[writtenBytes.Length - i]); } // verify the EOS is written correctly for (int i = 8; i > 4; i--) { Assert.Equal(0xFF, writtenBytes[writtenBytes.Length - i]); } for (int i = 4; i > 0; i--) { Assert.Equal(0x00, writtenBytes[writtenBytes.Length - i]); } } }