Example #1
0
        public async Task WritesFooterAlignedMulitpleOf8()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100);

            var stream = new MemoryStream();
            var writer = new ArrowFileWriter(
                stream,
                originalBatch.Schema,
                leaveOpen: true,
                // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix
                // which causes the length prefix to not be 8-byte aligned by default
                new IpcOptions()
            {
                WriteLegacyIpcFormat = true
            });

            await writer.WriteRecordBatchAsync(originalBatch);

            await writer.WriteEndAsync();

            stream.Position = 0;

            var reader = new ArrowFileReader(stream);
            int count  = await reader.RecordBatchCountAsync();

            Assert.Equal(1, count);
            RecordBatch readBatch = await reader.ReadRecordBatchAsync(0);

            ArrowReaderVerifier.CompareBatches(originalBatch, readBatch);
        }
        private async Task ValidateRecordBatchFile(Stream stream, RecordBatch recordBatch)
        {
            var reader = new ArrowFileReader(stream);
            int count  = await reader.RecordBatchCountAsync();

            Assert.Equal(1, count);
            RecordBatch readBatch = await reader.ReadRecordBatchAsync(0);

            ArrowReaderVerifier.CompareBatches(recordBatch, readBatch);
        }
        public async Task WritesEmptyFile()
        {
            RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1);

            var stream = new MemoryStream();
            var writer = new ArrowFileWriter(stream, originalBatch.Schema);

            writer.WriteStart();
            writer.WriteEnd();

            stream.Position = 0;

            var reader = new ArrowFileReader(stream);
            int count  = await reader.RecordBatchCountAsync();

            Assert.Equal(0, count);
            RecordBatch readBatch = reader.ReadNextRecordBatch();

            Assert.Null(readBatch);
            SchemaComparer.Compare(originalBatch.Schema, reader.Schema);
        }
        private async Task <int> Validate()
        {
            JsonFile jsonFile = await ParseJsonFile();

            using FileStream arrowFileStream = ArrowFileInfo.OpenRead();
            using ArrowFileReader reader     = new ArrowFileReader(arrowFileStream);
            int batchCount = await reader.RecordBatchCountAsync();

            if (batchCount != jsonFile.Batches.Count)
            {
                Console.WriteLine($"Incorrect batch count. JsonFile: {jsonFile.Batches.Count}, ArrowFile: {batchCount}");
                return(-1);
            }

            Schema jsonFileSchema  = CreateSchema(jsonFile.Schema);
            Schema arrowFileSchema = reader.Schema;

            SchemaComparer.Compare(jsonFileSchema, arrowFileSchema);

            for (int i = 0; i < batchCount; i++)
            {
                RecordBatch arrowFileRecordBatch = reader.ReadNextRecordBatch();
                RecordBatch jsonFileRecordBatch  = CreateRecordBatch(jsonFileSchema, jsonFile.Batches[i]);

                ArrowReaderVerifier.CompareBatches(jsonFileRecordBatch, arrowFileRecordBatch, strictCompare: false);
            }

            // ensure there are no more batches in the file
            if (reader.ReadNextRecordBatch() != null)
            {
                Console.WriteLine($"The ArrowFile has more RecordBatches than it should.");
                return(-1);
            }

            return(0);
        }