Exemplo n.º 1
0
        public async Task CanWriteToNetworkStreamAsync()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            const int   port     = 32154;
            TcpListener listener = new TcpListener(IPAddress.Loopback, port);

            listener.Start();

            using (TcpClient sender = new TcpClient())
            {
                sender.Connect(IPAddress.Loopback, port);
                NetworkStream stream = sender.GetStream();

                using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema))
                {
                    await writer.WriteRecordBatchAsync(originalBatch);

                    await writer.WriteEndAsync();

                    stream.Flush();
                }
            }

            using (TcpClient receiver = listener.AcceptTcpClient())
            {
                NetworkStream stream = receiver.GetStream();
                using (var reader = new ArrowStreamReader(stream))
                {
                    RecordBatch newBatch = reader.ReadNextRecordBatch();
                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                }
            }
        }
        private static async Task TestRoundTripRecordBatchesAsync(List <RecordBatch> originalBatches, IpcOptions options = null)
        {
            using (MemoryStream stream = new MemoryStream())
            {
                using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
                {
                    foreach (RecordBatch originalBatch in originalBatches)
                    {
                        await writer.WriteRecordBatchAsync(originalBatch);
                    }
                    await writer.WriteEndAsync();
                }

                stream.Position = 0;

                using (var reader = new ArrowStreamReader(stream))
                {
                    foreach (RecordBatch originalBatch in originalBatches)
                    {
                        RecordBatch newBatch = reader.ReadNextRecordBatch();
                        ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
                    }
                }
            }
        }
        public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen, bool createDictionaryArray, int expectedAllocations)
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema);
                await writer.WriteRecordBatchAsync(originalBatch);

                await writer.WriteEndAsync();

                stream.Position = 0;

                var memoryPool           = new TestMemoryAllocator();
                ArrowStreamReader reader = new ArrowStreamReader(stream, memoryPool, shouldLeaveOpen);
                reader.ReadNextRecordBatch();

                Assert.Equal(expectedAllocations, memoryPool.Statistics.Allocations);
                Assert.True(memoryPool.Statistics.BytesAllocated > 0);

                reader.Dispose();

                if (shouldLeaveOpen)
                {
                    Assert.True(stream.Position > 0);
                }
                else
                {
                    Assert.Throws <ObjectDisposedException>(() => stream.Position);
                }
            }
        }
        /// <summary>
        /// Verifies that the stream reader reads multiple times when a stream
        /// only returns a subset of the data from each Read.
        /// </summary>
        private static async Task TestReaderFromPartialReadStream(Func <ArrowStreamReader, RecordBatch, Task> verificationFunc, bool createDictionaryArray)
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray);

            using (PartialReadStream stream = new PartialReadStream())
            {
                ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema);
                await writer.WriteRecordBatchAsync(originalBatch);

                await writer.WriteEndAsync();

                stream.Position = 0;

                ArrowStreamReader reader = new ArrowStreamReader(stream);
                await verificationFunc(reader, originalBatch);
            }
        }
        public async Task WritesEmptyFileAsync()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1);

            var stream = new MemoryStream();
            var writer = new ArrowStreamWriter(stream, originalBatch.Schema);

            await writer.WriteStartAsync();

            await writer.WriteEndAsync();

            stream.Position = 0;

            var         reader    = new ArrowStreamReader(stream);
            RecordBatch readBatch = reader.ReadNextRecordBatch();

            Assert.Null(readBatch);
            SchemaComparer.Compare(originalBatch.Schema, reader.Schema);
        }
Exemplo n.º 6
0
        public async Task WriteLegacyIpcFormatAsync(bool writeLegacyIpcFormat)
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);
            var         options       = new IpcOptions()
            {
                WriteLegacyIpcFormat = writeLegacyIpcFormat
            };

            using (MemoryStream stream = new MemoryStream())
            {
                using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true, options))
                {
                    await writer.WriteRecordBatchAsync(originalBatch);

                    await writer.WriteEndAsync();
                }

                stream.Position = 0;

                // ensure the continuation is written correctly
                byte[] buffer        = stream.ToArray();
                int    messageLength = BinaryPrimitives.ReadInt32LittleEndian(buffer);
                int    endOfBuffer1  = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 8));
                int    endOfBuffer2  = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 4));
                if (writeLegacyIpcFormat)
                {
                    // the legacy IPC format doesn't have a continuation token at the start
                    Assert.NotEqual(-1, messageLength);
                    Assert.NotEqual(-1, endOfBuffer1);
                }
                else
                {
                    // the latest IPC format has a continuation token at the start
                    Assert.Equal(-1, messageLength);
                    Assert.Equal(-1, endOfBuffer1);
                }

                Assert.Equal(0, endOfBuffer2);
            }
        }
        private static async Task TestReaderFromStream(
            Func <ArrowStreamReader, RecordBatch, Task> verificationFunc,
            bool writeEnd)
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema);
                await writer.WriteRecordBatchAsync(originalBatch);

                if (writeEnd)
                {
                    await writer.WriteEndAsync();
                }

                stream.Position = 0;

                ArrowStreamReader reader = new ArrowStreamReader(stream);
                await verificationFunc(reader, originalBatch);
            }
        }
Exemplo n.º 8
0
        public async Task WriteBatchWithCorrectPaddingAsync()
        {
            byte value1 = 0x04;
            byte value2 = 0x14;
            var  batch  = new RecordBatch(
                new Schema.Builder()
                .Field(f => f.Name("age").DataType(Int32Type.Default))
                .Field(f => f.Name("characterCount").DataType(Int32Type.Default))
                .Build(),
                new IArrowArray[]
            {
                new Int32Array(
                    new ArrowBuffer(new byte[] { value1, value1, 0x00, 0x00 }),
                    ArrowBuffer.Empty,
                    length: 1,
                    nullCount: 0,
                    offset: 0),
                new Int32Array(
                    new ArrowBuffer(new byte[] { value2, value2, 0x00, 0x00 }),
                    ArrowBuffer.Empty,
                    length: 1,
                    nullCount: 0,
                    offset: 0)
            },
                length: 1);

            await TestRoundTripRecordBatchAsync(batch);

            using (MemoryStream stream = new MemoryStream())
            {
                using (var writer = new ArrowStreamWriter(stream, batch.Schema, leaveOpen: true))
                {
                    await writer.WriteRecordBatchAsync(batch);

                    await writer.WriteEndAsync();
                }

                byte[] writtenBytes = stream.ToArray();

                // ensure that the data buffers at the end are 8-byte aligned
                Assert.Equal(value1, writtenBytes[writtenBytes.Length - 24]);
                Assert.Equal(value1, writtenBytes[writtenBytes.Length - 23]);
                for (int i = 22; i > 16; i--)
                {
                    Assert.Equal(0, writtenBytes[writtenBytes.Length - i]);
                }

                Assert.Equal(value2, writtenBytes[writtenBytes.Length - 16]);
                Assert.Equal(value2, writtenBytes[writtenBytes.Length - 15]);
                for (int i = 14; i > 8; i--)
                {
                    Assert.Equal(0, writtenBytes[writtenBytes.Length - i]);
                }

                // verify the EOS is written correctly
                for (int i = 8; i > 4; i--)
                {
                    Assert.Equal(0xFF, writtenBytes[writtenBytes.Length - i]);
                }
                for (int i = 4; i > 0; i--)
                {
                    Assert.Equal(0x00, writtenBytes[writtenBytes.Length - i]);
                }
            }
        }