public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder)
        {
            var array  = (FloatArray)batch.Column("Values");
            var values = array.Values;

            FindMinMax(values);
        }
Пример #2
0
        public static async Task Main(string[] args)
        {
            // Use a specific memory pool from which arrays will be allocated (optional)

            var memoryAllocator = new NativeMemoryAllocator(alignment: 64);

            // Build a record batch using the Fluent API

            var recordBatch = new RecordBatch.Builder(memoryAllocator)
                              .Append("Column A", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                              .Append("Column B", false, col => col.Float(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => Convert.ToSingle(x * 2)))))
                              .Append("Column C", false, col => col.String(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => $"Item {x+1}"))))
                              .Append("Column D", false, col => col.Boolean(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => x % 2 == 0))))
                              .Build();

            // Print memory allocation statistics

            Console.WriteLine("Allocations: {0}", memoryAllocator.Statistics.Allocations);
            Console.WriteLine("Allocated: {0} byte(s)", memoryAllocator.Statistics.BytesAllocated);

            // Write record batch to a file

            using (var stream = File.OpenWrite("test.arrow"))
                using (var writer = new ArrowFileWriter(stream, recordBatch.Schema))
                {
                    await writer.WriteRecordBatchAsync(recordBatch);

                    await writer.WriteFooterAsync();
                }

            Console.WriteLine("Done");
            Console.ReadKey();
        }
Пример #3
0
        public async Task WriteBatchWithNullsAsync()
        {
            RecordBatch originalBatch = new RecordBatch.Builder()
                                        .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                                        .Append("Column2", true, new Int32Array(
                                                    valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                                    length: 10,
                                                    nullCount: 2,
                                                    offset: 0))
                                        .Append("Column3", true, new Int32Array(
                                                    valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(),
                                                    length: 10,
                                                    nullCount: 10,
                                                    offset: 0))
                                        .Append("NullableBooleanColumn", true, new BooleanArray(
                                                    valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(),
                                                    length: 10,
                                                    nullCount: 3,
                                                    offset: 0))
                                        .Build();

            await TestRoundTripRecordBatchAsync(originalBatch);
        }
        public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder)
        {
            var length = batch.Arrays.First().Length;
            var values = Generator.Float(length);

            batchBuilder.Append("Force", false, arrayBuilder => arrayBuilder.Float(builder =>
                                                                                   builder.AppendRange(values)));
        }
Пример #5
0
        public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder)
        {
            var array   = (FloatArray)batch.Column("Values");
            var array2  = (FloatArray)batch.Column("Values2");
            var values  = array.Values;
            var values2 = array2.Values;

            FindSum(values, values2);
        }
        public void TestMutationOnArrowColumn()
        {
            RecordBatch originalBatch = new RecordBatch.Builder()
                                        .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))).Build();
            DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch);

            Assert.Equal(1, df.Columns["Column1"][1]);
            df.Columns["Column1"][1] = 100;
            Assert.Equal(100, df.Columns["Column1"][1]);
            Assert.Equal(0, df.Columns["Column1"].NullCount);
        }
Пример #7
0
        private RecordBatch CreateTestBatch(int startValue, int length)
        {
            var batchBuilder = new RecordBatch.Builder();

            Int32Array.Builder builder = new Int32Array.Builder();
            for (int i = 0; i < length; i++)
            {
                builder.Append(startValue + i);
            }
            batchBuilder.Append("test", true, builder.Build());
            return(batchBuilder.Build());
        }
        public void TestInconsistentNullBitMapLength()
        {
            // Arrow allocates buffers of length 64 by default. 64 * 8 = 512 bits in the NullBitMapBuffer. Anything lesser than 512 will not trigger a throw
            Int32Array  int32         = new Int32Array.Builder().AppendRange(Enumerable.Range(0, 520)).Build();
            RecordBatch originalBatch = new RecordBatch.Builder()
                                        .Append("EmptyDataColumn", true, new Int32Array(
                                                    valueBuffer: int32.ValueBuffer,
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Build(),
                                                    length: 520,
                                                    nullCount: 520,
                                                    offset: 0)).Build();

            Assert.ThrowsAny <ArgumentException>(() => DataFrame.FromArrowRecordBatch(originalBatch));
        }
Пример #9
0
        public void TestPrimitiveColumnGetReadOnlyBuffers()
        {
            RecordBatch recordBatch = new RecordBatch.Builder()
                                      .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))).Build();
            DataFrame df = DataFrame.FromArrowRecordBatch(recordBatch);

            PrimitiveDataFrameColumn <int> column = df["Column1"] as PrimitiveDataFrameColumn <int>;

            IEnumerable <ReadOnlyMemory <int> >  buffers     = column.GetReadOnlyDataBuffers();
            IEnumerable <ReadOnlyMemory <byte> > nullBitMaps = column.GetReadOnlyNullBitMapBuffers();

            long i = 0;
            IEnumerator <ReadOnlyMemory <int> >  bufferEnumerator      = buffers.GetEnumerator();
            IEnumerator <ReadOnlyMemory <byte> > nullBitMapsEnumerator = nullBitMaps.GetEnumerator();

            while (bufferEnumerator.MoveNext() && nullBitMapsEnumerator.MoveNext())
            {
                ReadOnlyMemory <int>  dataBuffer = bufferEnumerator.Current;
                ReadOnlyMemory <byte> nullBitMap = nullBitMapsEnumerator.Current;

                ReadOnlySpan <int> span = dataBuffer.Span;
                for (int j = 0; j < span.Length; j++)
                {
                    // Each buffer has a max length of int.MaxValue
                    Assert.Equal(span[j], column[j + i * int.MaxValue]);
                }

                bool GetBit(byte curBitMap, int index)
                {
                    return(((curBitMap >> (index & 7)) & 1) != 0);
                }

                ReadOnlySpan <byte> bitMapSpan = nullBitMap.Span;
                // No nulls in this column, so each bit must be set
                for (int j = 0; j < bitMapSpan.Length; j++)
                {
                    for (int k = 0; k < 8; k++)
                    {
                        if (j * 8 + k == column.Length)
                        {
                            break;
                        }
                        Assert.True(GetBit(bitMapSpan[j], k));
                    }
                }
                i++;
            }
        }
        public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder)
        {
            var velocity = (FloatArray)batch.Column("Velocity");
            var force    = (FloatArray)batch.Column("Force");
            var mass     = (FloatArray)batch.Column("Mass");

            var length  = velocity.Length;
            var results = new float[length];

            for (var i = 0; i < length; i++)
            {
                results[i] = velocity.Values[i] + force.Values[i] / mass.Values[i];
            }

            batchBuilder.Append("Velocity", false,
                                arrayBuilder => arrayBuilder.Float(builder => builder.AppendRange(results)));
        }
Пример #11
0
        public void TestArrowIntegration()
        {
            RecordBatch originalBatch = new RecordBatch.Builder()
                                        .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                                        .Append("Column2", true, new Int32Array(
                                                    valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                                    length: 10,
                                                    nullCount: 1,
                                                    offset: 0))
                                        .Append("Column3", true, new Int32Array(
                                                    valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(),
                                                    length: 10,
                                                    nullCount: 10,
                                                    offset: 0))
                                        .Append("NullableBooleanColumn", true, new BooleanArray(
                                                    valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(),
                                                    length: 10,
                                                    nullCount: 2,
                                                    offset: 0))
                                        .Append("StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build())
                                        .Append("DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build())
                                        .Append("FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build())
                                        .Append("ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build())
                                        .Append("LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build())
                                        .Append("UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build())
                                        .Append("UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build())
                                        .Append("ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build())
                                        .Append("ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build())
                                        .Append("UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build())
                                        .Build();

            DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch);

            DataFrameIOTests.VerifyColumnTypes(df, testArrowStringColumn: true);

            IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches();

            foreach (RecordBatch batch in recordBatches)
            {
                RecordBatchComparer.CompareBatches(originalBatch, batch);
            }
        }
Пример #12
0
    private RecordBatch createOutputRecordBatch(List <IRecord> rows)
    {
        var recordBatchBuilder = new RecordBatch.Builder(memoryAllocator);

        for (int i = 0; i < this.outputArrowSchema.Fields.Count; i++)
        {
            var field = this.outputArrowSchema.GetFieldByIndex(i);
            switch (field.DataType.TypeId)
            {
            case ArrowTypeId.Int64:
                recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Int64(
                                              array => array.AppendRange(rows.Select(row => Convert.ToInt64(row[i])))));
                break;

            case ArrowTypeId.Double:
                recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Double(
                                              array => array.AppendRange(rows.Select(row => Convert.ToDouble(row[i])))));
                break;

            case ArrowTypeId.String:
                recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.String(
                                              array => array.AppendRange(rows.Select(row => Convert.ToString(row[i])))));
                break;

            case ArrowTypeId.Timestamp:
                recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Int64(
                                              array => array.AppendRange(rows.Select(row => (((DateTime)row[i]).Ticks - epoch.Ticks) / MicrosecToMillisecRatio))));
                break;

            case ArrowTypeId.Binary:
                recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Binary(
                                              array => array.AppendRange(rows.Select(row => (byte[])(row[i])))));
                break;

            case ArrowTypeId.Boolean:
                recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Boolean(
                                              array => array.AppendRange(rows.Select(row => Convert.ToBoolean(row[i])))));
                break;

            default: throw new Exception("Unsupported Arrow type of output arrow schema: " + field.DataType.TypeId);
            }
        }

        return(recordBatchBuilder.Build());
    }
Пример #13
0
        public ByteString ToGprcArrowFrame()
        {
            MemoryStream stream = new MemoryStream();

            var recordBatchBuilder = new RecordBatch.Builder();

            foreach (Field field in fields)
            {
                recordBatchBuilder.Append(field.Name, true, field.ToArrowArray());
            }

            var recordBatch = recordBatchBuilder.Build();
            var writer      = new ArrowFileWriter(stream, recordBatch.Schema);

            writer.WriteRecordBatch(recordBatch);
            writer.WriteEnd();

            stream.Position = 0;

            return(ByteString.FromStream(stream));
        }
        public void TestEmptyArrowColumns()
        {
            // Tests to ensure that we don't crash and the internal NullCounts stay consistent on encountering:
            // 1. Data + Empty null bitmaps
            // 2. Empty Data + Null bitmaps
            // 3. Empty Data + Empty null bitmaps
            RecordBatch originalBatch = new RecordBatch.Builder()
                                        .Append("EmptyNullBitMapColumn", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                                        .Append("EmptyDataColumn", true, new Int32Array(
                                                    valueBuffer: ArrowBuffer.Empty,
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(),
                                                    length: 10,
                                                    nullCount: 10,
                                                    offset: 0)).Build();
            DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch);

            Assert.Equal(0, df.Columns["EmptyNullBitMapColumn"].NullCount);
            Assert.Equal(10, df.Columns["EmptyNullBitMapColumn"].Length);
            df.Columns["EmptyNullBitMapColumn"][9] = null;
            Assert.Equal(1, df.Columns["EmptyNullBitMapColumn"].NullCount);
            Assert.Equal(10, df.Columns["EmptyDataColumn"].NullCount);
            Assert.Equal(10, df.Columns["EmptyDataColumn"].Length);
            df.Columns["EmptyDataColumn"][9] = 9;
            Assert.Equal(9, df.Columns["EmptyDataColumn"].NullCount);
            Assert.Equal(10, df.Columns["EmptyDataColumn"].Length);
            for (int i = 0; i < 9; i++)
            {
                Assert.Equal(i, (int)df.Columns["EmptyNullBitMapColumn"][i]);
                Assert.Null(df.Columns["EmptyDataColumn"][i]);
            }

            RecordBatch batch1 = new RecordBatch.Builder()
                                 .Append("EmptyDataAndNullColumns", false, col => col.Int32(array => array.Clear())).Build();
            DataFrame emptyDataFrame = DataFrame.FromArrowRecordBatch(batch1);

            Assert.Equal(0, emptyDataFrame.Rows.Count);
            Assert.Equal(0, emptyDataFrame.Columns["EmptyDataAndNullColumns"].Length);
            Assert.Equal(0, emptyDataFrame.Columns["EmptyDataAndNullColumns"].NullCount);
        }
Пример #15
0
        private static void ExecuteActions(MemoryAllocator allocator, RecordBatch batch, IReadOnlyList <IAction> actions,
                                           int iterations)
        {
            var builder = new RecordBatch.Builder(allocator);

            for (var i = 0; i < iterations; i++)
            {
                foreach (var action in actions)
                {
                    action.Execute(batch, builder);
                }

                try
                {
                    batch   = builder.Build();
                    builder = new RecordBatch.Builder(allocator);
                }
                catch (InvalidOperationException)
                {
                }
            }
        }
Пример #16
0
        public void TestRecordBatchWithStructArrays()
        {
            RecordBatch CreateRecordBatch(string prependColumnNamesWith = "")
            {
                RecordBatch ret = new RecordBatch.Builder()
                                  .Append(prependColumnNamesWith + "Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                                  .Append(prependColumnNamesWith + "Column2", true, new Int32Array(
                                              valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                              nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                              length: 10,
                                              nullCount: 1,
                                              offset: 0))
                                  .Append(prependColumnNamesWith + "Column3", true, new Int32Array(
                                              valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                              nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(),
                                              length: 10,
                                              nullCount: 10,
                                              offset: 0))
                                  .Append(prependColumnNamesWith + "NullableBooleanColumn", true, new BooleanArray(
                                              valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                              nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(),
                                              length: 10,
                                              nullCount: 2,
                                              offset: 0))
                                  .Append(prependColumnNamesWith + "StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build())
                                  .Append(prependColumnNamesWith + "DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build())
                                  .Append(prependColumnNamesWith + "FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build())
                                  .Append(prependColumnNamesWith + "ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build())
                                  .Build();

                return(ret);
            }

            RecordBatch originalBatch = CreateRecordBatch();

            ArrowBuffer.BitmapBuilder validityBitmapBuilder = new ArrowBuffer.BitmapBuilder();
            for (int i = 0; i < originalBatch.Length; i++)
            {
                validityBitmapBuilder.Append(true);
            }
            ArrowBuffer validityBitmap = validityBitmapBuilder.Build();

            StructType  structType  = new StructType(originalBatch.Schema.Fields.Select((KeyValuePair <string, Field> pair) => pair.Value).ToList());
            StructArray structArray = new StructArray(structType, originalBatch.Length, originalBatch.Arrays.Cast <Apache.Arrow.Array>(), validityBitmap);
            Schema      schema      = new Schema.Builder().Field(new Field("Struct", structType, false)).Build();
            RecordBatch recordBatch = new RecordBatch(schema, new[] { structArray }, originalBatch.Length);

            DataFrame df = DataFrame.FromArrowRecordBatch(recordBatch);

            DataFrameIOTests.VerifyColumnTypes(df, testArrowStringColumn: true);

            IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches();

            RecordBatch expected = CreateRecordBatch("Struct_");

            foreach (RecordBatch batch in recordBatches)
            {
                RecordBatchComparer.CompareBatches(expected, batch);
            }
        }
Пример #17
0
 public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder)
 {
     batchBuilder.Append("Mass", false, batch.Column("Mass"));
 }