Beispiel #1
0
        public async Task CanWriteToNetworkStreamAsync()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            const int   port     = 32154;
            TcpListener listener = new TcpListener(IPAddress.Loopback, port);

            listener.Start();

            using (TcpClient sender = new TcpClient())
            {
                sender.Connect(IPAddress.Loopback, port);
                NetworkStream stream = sender.GetStream();

                using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema))
                {
                    await writer.WriteRecordBatchAsync(originalBatch);

                    await writer.WriteEndAsync();

                    stream.Flush();
                }
            }

            using (TcpClient receiver = listener.AcceptTcpClient())
            {
                NetworkStream stream = receiver.GetStream();
                using (var reader = new ArrowStreamReader(stream))
                {
                    RecordBatch newBatch = reader.ReadNextRecordBatch();
                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                }
            }
        }
        public void TestSingleElement()
        {
            Int32Array  array       = new Int32Array.Builder().Append(1).Append(2).Build();
            IArrowArray actualArray = ArrowArrayConcatenatorReflector.InvokeConcatenate(new[] { array });

            ArrowReaderVerifier.CompareArrays(array, actualArray);
        }
Beispiel #3
0
        public async Task TestReadMultipleRecordBatchAsync()
        {
            RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100);
            RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema);
                await writer.WriteRecordBatchAsync(originalBatch1);

                await writer.WriteRecordBatchAsync(originalBatch2);

                await writer.WriteFooterAsync();

                stream.Position = 0;

                // the recordbatches by index are in reverse order - back to front.
                // TODO: is this a bug??
                ArrowFileReader reader     = new ArrowFileReader(stream);
                RecordBatch     readBatch1 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch1);

                RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1);

                ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch2);

                // now read the first again, for random access
                RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch3);
            }
        }
Beispiel #4
0
        public async Task WritesFooterAlignedMulitpleOf8()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            var stream = new MemoryStream();
            var writer = new ArrowFileWriter(
                stream,
                originalBatch.Schema,
                leaveOpen: true,
                // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix
                // which causes the length prefix to not be 8-byte aligned by default
                new IpcOptions()
            {
                WriteLegacyIpcFormat = true
            });

            await writer.WriteRecordBatchAsync(originalBatch);

            await writer.WriteEndAsync();

            stream.Position = 0;

            var reader = new ArrowFileReader(stream);
            int count  = await reader.RecordBatchCountAsync();

            Assert.Equal(1, count);
            RecordBatch readBatch = await reader.ReadRecordBatchAsync(0);

            ArrowReaderVerifier.CompareBatches(originalBatch, readBatch);
        }
        private static async Task TestRoundTripRecordBatchesAsync(List <RecordBatch> originalBatches, IpcOptions options = null)
        {
            using (MemoryStream stream = new MemoryStream())
            {
                using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
                {
                    foreach (RecordBatch originalBatch in originalBatches)
                    {
                        await writer.WriteRecordBatchAsync(originalBatch);
                    }
                    await writer.WriteEndAsync();
                }

                stream.Position = 0;

                using (var reader = new ArrowStreamReader(stream))
                {
                    foreach (RecordBatch originalBatch in originalBatches)
                    {
                        RecordBatch newBatch = reader.ReadNextRecordBatch();
                        ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                    }
                }
            }
        }
        public async Task TestReadMultipleRecordBatchAsync()
        {
            RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100);
            RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema);
                await writer.WriteRecordBatchAsync(originalBatch1);

                await writer.WriteRecordBatchAsync(originalBatch2);

                await writer.WriteEndAsync();

                stream.Position = 0;

                ArrowFileReader reader     = new ArrowFileReader(stream);
                RecordBatch     readBatch1 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch1);

                RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1);

                ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch2);

                // now read the first again, for random access
                RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch3);
            }
        }
        public void CanWriteToNetworkStream(bool createDictionaryArray, int port)
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray);

            TcpListener listener = new TcpListener(IPAddress.Loopback, port);

            listener.Start();

            using (TcpClient sender = new TcpClient())
            {
                sender.Connect(IPAddress.Loopback, port);
                NetworkStream stream = sender.GetStream();

                using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema))
                {
                    writer.WriteRecordBatch(originalBatch);
                    writer.WriteEnd();

                    stream.Flush();
                }
            }

            using (TcpClient receiver = listener.AcceptTcpClient())
            {
                NetworkStream stream = receiver.GetStream();
                using (var reader = new ArrowStreamReader(stream))
                {
                    RecordBatch newBatch = reader.ReadNextRecordBatch();
                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                }
            }
        }
 public void TestStandardCases()
 {
     foreach ((List <IArrowArray> testTargetArrayList, IArrowArray expectedArray) in GenerateTestData())
     {
         IArrowArray actualArray = ArrowArrayConcatenatorReflector.InvokeConcatenate(testTargetArrayList);
         ArrowReaderVerifier.CompareArrays(expectedArray, actualArray);
     }
 }
Beispiel #9
0
 public async Task ReadRecordBatch_Stream()
 {
     await TestReaderFromStream((reader, originalBatch) =>
     {
         ArrowReaderVerifier.VerifyReader(reader, originalBatch);
         return(Task.CompletedTask);
     });
 }
Beispiel #10
0
 public async Task TestReadNextRecordBatch()
 {
     await TestReadRecordBatchHelper((reader, originalBatch) =>
     {
         ArrowReaderVerifier.VerifyReader(reader, originalBatch);
         return(Task.CompletedTask);
     });
 }
 public async Task ReadRecordBatch_Memory(bool writeEnd)
 {
     await TestReaderFromMemory((reader, originalBatch) =>
     {
         ArrowReaderVerifier.VerifyReader(reader, originalBatch);
         return(Task.CompletedTask);
     }, writeEnd);
 }
 public async Task ReadRecordBatch_PartialReadStream(bool createDictionaryArray)
 {
     await TestReaderFromPartialReadStream((reader, originalBatch) =>
     {
         ArrowReaderVerifier.VerifyReader(reader, originalBatch);
         return(Task.CompletedTask);
     }, createDictionaryArray);
 }
        private async Task ValidateRecordBatchFile(Stream stream, RecordBatch recordBatch)
        {
            var reader = new ArrowFileReader(stream);
            int count  = await reader.RecordBatchCountAsync();

            Assert.Equal(1, count);
            RecordBatch readBatch = await reader.ReadRecordBatchAsync(0);

            ArrowReaderVerifier.CompareBatches(recordBatch, readBatch);
        }
Beispiel #14
0
        public async Task TestReadRecordBatchAsync()
        {
            await TestReadRecordBatchHelper(async (reader, originalBatch) =>
            {
                RecordBatch readBatch = await reader.ReadRecordBatchAsync(0);
                ArrowReaderVerifier.CompareBatches(originalBatch, readBatch);

                // You should be able to read the same record batch again
                RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(0);
                ArrowReaderVerifier.CompareBatches(originalBatch, readBatch2);
            });
        }
Beispiel #15
0
        public void TestRecordBatchBasics()
        {
            RecordBatch recordBatch = TestData.CreateSampleRecordBatch(length: 1);

            Assert.Throws <ArgumentOutOfRangeException>(() => new RecordBatch(recordBatch.Schema, recordBatch.Arrays, -1));

            var col1 = recordBatch.Column(0);
            var col2 = recordBatch.Column("list0");

            ArrowReaderVerifier.CompareArrays(col1, col2);

            recordBatch.Dispose();
        }
        private static async Task TestRoundTripRecordBatch(RecordBatch originalBatch)
        {
            using (MemoryStream stream = new MemoryStream())
            {
                using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true))
                {
                    await writer.WriteRecordBatchAsync(originalBatch);
                }

                stream.Position = 0;

                using (var reader = new ArrowStreamReader(stream))
                {
                    RecordBatch newBatch = reader.ReadNextRecordBatch();
                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                }
            }
        }
Beispiel #17
0
        private static void TestRoundTripRecordBatch(RecordBatch originalBatch, IpcOptions options = null)
        {
            using (MemoryStream stream = new MemoryStream())
            {
                using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true, options))
                {
                    writer.WriteRecordBatch(originalBatch);
                    writer.WriteEnd();
                }

                stream.Position = 0;

                using (var reader = new ArrowStreamReader(stream))
                {
                    RecordBatch newBatch = reader.ReadNextRecordBatch();
                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                }
            }
        }
        public async Task WriteEmptyBatch()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 0);

            using (MemoryStream stream = new MemoryStream())
            {
                using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true))
                {
                    await writer.WriteRecordBatchAsync(originalBatch);
                }

                stream.Position = 0;

                using (var reader = new ArrowStreamReader(stream))
                {
                    RecordBatch newBatch = reader.ReadNextRecordBatch();
                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                }
            }
        }