Exemplo n.º 1
0
        public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen)
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema);
                await writer.WriteRecordBatchAsync(originalBatch);

                await writer.WriteEndAsync();

                stream.Position = 0;

                var             memoryPool = new TestMemoryAllocator();
                ArrowFileReader reader     = new ArrowFileReader(stream, memoryPool, leaveOpen: shouldLeaveOpen);
                reader.ReadNextRecordBatch();

                Assert.Equal(1, memoryPool.Statistics.Allocations);
                Assert.True(memoryPool.Statistics.BytesAllocated > 0);

                reader.Dispose();

                if (shouldLeaveOpen)
                {
                    Assert.True(stream.Position > 0);
                }
                else
                {
                    Assert.Throws <ObjectDisposedException>(() => stream.Position);
                }
            }
        }
Exemplo n.º 2
0
        public async Task TestReadMultipleRecordBatchAsync()
        {
            RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100);
            RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema);
                await writer.WriteRecordBatchAsync(originalBatch1);

                await writer.WriteRecordBatchAsync(originalBatch2);

                await writer.WriteFooterAsync();

                stream.Position = 0;

                // the recordbatches by index are in reverse order - back to front.
                // TODO: is this a bug??
                ArrowFileReader reader     = new ArrowFileReader(stream);
                RecordBatch     readBatch1 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch1);

                RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1);

                ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch2);

                // now read the first again, for random access
                RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch3);
            }
        }
Exemplo n.º 3
0
        public async Task TestReadMultipleRecordBatchAsync()
        {
            RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100);
            RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema);
                await writer.WriteRecordBatchAsync(originalBatch1);

                await writer.WriteRecordBatchAsync(originalBatch2);

                await writer.WriteEndAsync();

                stream.Position = 0;

                ArrowFileReader reader     = new ArrowFileReader(stream);
                RecordBatch     readBatch1 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch1);

                RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1);

                ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch2);

                // now read the first again, for random access
                RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0);

                ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch3);
            }
        }
Exemplo n.º 4
0
        public static async Task Main(string[] args)
        {
            // Use a specific memory pool from which arrays will be allocated (optional)

            var memoryAllocator = new NativeMemoryAllocator(alignment: 64);

            // Build a record batch using the Fluent API

            var recordBatch = new RecordBatch.Builder(memoryAllocator)
                              .Append("Column A", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                              .Append("Column B", false, col => col.Float(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => Convert.ToSingle(x * 2)))))
                              .Append("Column C", false, col => col.String(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => $"Item {x+1}"))))
                              .Append("Column D", false, col => col.Boolean(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => x % 2 == 0))))
                              .Build();

            // Print memory allocation statistics

            Console.WriteLine("Allocations: {0}", memoryAllocator.Statistics.Allocations);
            Console.WriteLine("Allocated: {0} byte(s)", memoryAllocator.Statistics.BytesAllocated);

            // Write record batch to a file

            using (var stream = File.OpenWrite("test.arrow"))
                using (var writer = new ArrowFileWriter(stream, recordBatch.Schema))
                {
                    await writer.WriteRecordBatchAsync(recordBatch);

                    await writer.WriteFooterAsync();
                }

            Console.WriteLine("Done");
            Console.ReadKey();
        }
Exemplo n.º 5
0
        public async Task WritesFooterAlignedMulitpleOf8()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            var stream = new MemoryStream();
            var writer = new ArrowFileWriter(
                stream,
                originalBatch.Schema,
                leaveOpen: true,
                // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix
                // which causes the length prefix to not be 8-byte aligned by default
                new IpcOptions()
            {
                WriteLegacyIpcFormat = true
            });

            await writer.WriteRecordBatchAsync(originalBatch);

            await writer.WriteEndAsync();

            stream.Position = 0;

            var reader = new ArrowFileReader(stream);
            int count  = await reader.RecordBatchCountAsync();

            Assert.Equal(1, count);
            RecordBatch readBatch = await reader.ReadRecordBatchAsync(0);

            ArrowReaderVerifier.CompareBatches(originalBatch, readBatch);
        }
Exemplo n.º 6
0
        private static async Task TestReadRecordBatchHelper(
            Func <ArrowFileReader, RecordBatch, Task> verificationFunc)
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            using (MemoryStream stream = new MemoryStream())
            {
                ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema);
                await writer.WriteRecordBatchAsync(originalBatch);

                await writer.WriteFooterAsync();

                stream.Position = 0;

                ArrowFileReader reader = new ArrowFileReader(stream);
                await verificationFunc(reader, originalBatch);
            }
        }
Exemplo n.º 7
0
        public ByteString ToGprcArrowFrame()
        {
            MemoryStream stream = new MemoryStream();

            var recordBatchBuilder = new RecordBatch.Builder();

            foreach (Field field in fields)
            {
                recordBatchBuilder.Append(field.Name, true, field.ToArrowArray());
            }

            var recordBatch = recordBatchBuilder.Build();
            var writer      = new ArrowFileWriter(stream, recordBatch.Schema);

            writer.WriteRecordBatch(recordBatch);
            writer.WriteEnd();

            stream.Position = 0;

            return(ByteString.FromStream(stream));
        }
Exemplo n.º 8
0
        public async Task WritesEmptyFile()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1);

            var stream = new MemoryStream();
            var writer = new ArrowFileWriter(stream, originalBatch.Schema);

            writer.WriteStart();
            writer.WriteEnd();

            stream.Position = 0;

            var reader = new ArrowFileReader(stream);
            int count  = await reader.RecordBatchCountAsync();

            Assert.Equal(0, count);
            RecordBatch readBatch = reader.ReadNextRecordBatch();

            Assert.Null(readBatch);
            SchemaComparer.Compare(originalBatch.Schema, reader.Schema);
        }
Exemplo n.º 9
0
        public async Task WritesFooterAlignedMulitpleOf8()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            var stream = new MemoryStream();
            var writer = new ArrowFileWriter(
                stream,
                originalBatch.Schema,
                leaveOpen: true,
                // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix
                // which causes the length prefix to not be 8-byte aligned by default
                new IpcOptions()
            {
                WriteLegacyIpcFormat = true
            });

            writer.WriteRecordBatch(originalBatch);
            writer.WriteEnd();

            stream.Position = 0;

            await ValidateRecordBatchFile(stream, originalBatch);
        }
Exemplo n.º 10
0
        private async Task <int> JsonToArrow()
        {
            JsonFile jsonFile = await ParseJsonFile();

            Schema schema = CreateSchema(jsonFile.Schema);

            using (FileStream fs = ArrowFileInfo.Create())
            {
                ArrowFileWriter writer = new ArrowFileWriter(fs, schema);
                await writer.WriteStartAsync();

                foreach (var jsonRecordBatch in jsonFile.Batches)
                {
                    RecordBatch batch = CreateRecordBatch(schema, jsonRecordBatch);
                    await writer.WriteRecordBatchAsync(batch);
                }
                await writer.WriteEndAsync();

                await fs.FlushAsync();
            }

            return(0);
        }