public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder) { var array = (FloatArray)batch.Column("Values"); var values = array.Values; FindMinMax(values); }
public static async Task Main(string[] args) { // Use a specific memory pool from which arrays will be allocated (optional) var memoryAllocator = new NativeMemoryAllocator(alignment: 64); // Build a record batch using the Fluent API var recordBatch = new RecordBatch.Builder(memoryAllocator) .Append("Column A", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append("Column B", false, col => col.Float(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => Convert.ToSingle(x * 2))))) .Append("Column C", false, col => col.String(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => $"Item {x+1}")))) .Append("Column D", false, col => col.Boolean(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => x % 2 == 0)))) .Build(); // Print memory allocation statistics Console.WriteLine("Allocations: {0}", memoryAllocator.Statistics.Allocations); Console.WriteLine("Allocated: {0} byte(s)", memoryAllocator.Statistics.BytesAllocated); // Write record batch to a file using (var stream = File.OpenWrite("test.arrow")) using (var writer = new ArrowFileWriter(stream, recordBatch.Schema)) { await writer.WriteRecordBatchAsync(recordBatch); await writer.WriteFooterAsync(); } Console.WriteLine("Done"); Console.ReadKey(); }
public async Task WriteBatchWithNullsAsync() { RecordBatch originalBatch = new RecordBatch.Builder() .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append("Column2", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), length: 10, nullCount: 2, offset: 0)) .Append("Column3", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(), length: 10, nullCount: 10, offset: 0)) .Append("NullableBooleanColumn", true, new BooleanArray( valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(), length: 10, nullCount: 3, offset: 0)) .Build(); await TestRoundTripRecordBatchAsync(originalBatch); }
public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder) { var length = batch.Arrays.First().Length; var values = Generator.Float(length); batchBuilder.Append("Force", false, arrayBuilder => arrayBuilder.Float(builder => builder.AppendRange(values))); }
public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder) { var array = (FloatArray)batch.Column("Values"); var array2 = (FloatArray)batch.Column("Values2"); var values = array.Values; var values2 = array2.Values; FindSum(values, values2); }
public void TestMutationOnArrowColumn() { RecordBatch originalBatch = new RecordBatch.Builder() .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))).Build(); DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch); Assert.Equal(1, df.Columns["Column1"][1]); df.Columns["Column1"][1] = 100; Assert.Equal(100, df.Columns["Column1"][1]); Assert.Equal(0, df.Columns["Column1"].NullCount); }
private RecordBatch CreateTestBatch(int startValue, int length) { var batchBuilder = new RecordBatch.Builder(); Int32Array.Builder builder = new Int32Array.Builder(); for (int i = 0; i < length; i++) { builder.Append(startValue + i); } batchBuilder.Append("test", true, builder.Build()); return(batchBuilder.Build()); }
public void TestInconsistentNullBitMapLength() { // Arrow allocates buffers of length 64 by default. 64 * 8 = 512 bits in the NullBitMapBuffer. Anything lesser than 512 will not trigger a throw Int32Array int32 = new Int32Array.Builder().AppendRange(Enumerable.Range(0, 520)).Build(); RecordBatch originalBatch = new RecordBatch.Builder() .Append("EmptyDataColumn", true, new Int32Array( valueBuffer: int32.ValueBuffer, nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Build(), length: 520, nullCount: 520, offset: 0)).Build(); Assert.ThrowsAny <ArgumentException>(() => DataFrame.FromArrowRecordBatch(originalBatch)); }
public void TestPrimitiveColumnGetReadOnlyBuffers() { RecordBatch recordBatch = new RecordBatch.Builder() .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))).Build(); DataFrame df = DataFrame.FromArrowRecordBatch(recordBatch); PrimitiveDataFrameColumn <int> column = df["Column1"] as PrimitiveDataFrameColumn <int>; IEnumerable <ReadOnlyMemory <int> > buffers = column.GetReadOnlyDataBuffers(); IEnumerable <ReadOnlyMemory <byte> > nullBitMaps = column.GetReadOnlyNullBitMapBuffers(); long i = 0; IEnumerator <ReadOnlyMemory <int> > bufferEnumerator = buffers.GetEnumerator(); IEnumerator <ReadOnlyMemory <byte> > nullBitMapsEnumerator = nullBitMaps.GetEnumerator(); while (bufferEnumerator.MoveNext() && nullBitMapsEnumerator.MoveNext()) { ReadOnlyMemory <int> dataBuffer = bufferEnumerator.Current; ReadOnlyMemory <byte> nullBitMap = nullBitMapsEnumerator.Current; ReadOnlySpan <int> span = dataBuffer.Span; for (int j = 0; j < span.Length; j++) { // Each buffer has a max length of int.MaxValue Assert.Equal(span[j], column[j + i * int.MaxValue]); } bool GetBit(byte curBitMap, int index) { return(((curBitMap >> (index & 7)) & 1) != 0); } ReadOnlySpan <byte> bitMapSpan = nullBitMap.Span; // No nulls in this column, so each bit must be set for (int j = 0; j < bitMapSpan.Length; j++) { for (int k = 0; k < 8; k++) { if (j * 8 + k == column.Length) { break; } Assert.True(GetBit(bitMapSpan[j], k)); } } i++; } }
public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder) { var velocity = (FloatArray)batch.Column("Velocity"); var force = (FloatArray)batch.Column("Force"); var mass = (FloatArray)batch.Column("Mass"); var length = velocity.Length; var results = new float[length]; for (var i = 0; i < length; i++) { results[i] = velocity.Values[i] + force.Values[i] / mass.Values[i]; } batchBuilder.Append("Velocity", false, arrayBuilder => arrayBuilder.Float(builder => builder.AppendRange(results))); }
public void TestArrowIntegration() { RecordBatch originalBatch = new RecordBatch.Builder() .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append("Column2", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), length: 10, nullCount: 1, offset: 0)) .Append("Column3", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(), length: 10, nullCount: 10, offset: 0)) .Append("NullableBooleanColumn", true, new BooleanArray( valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(), length: 10, nullCount: 2, offset: 0)) .Append("StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build()) .Append("DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build()) .Append("FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build()) .Append("ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build()) .Append("LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build()) .Append("UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build()) .Append("UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build()) .Append("ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build()) .Append("ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build()) .Append("UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build()) .Build(); DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch); DataFrameIOTests.VerifyColumnTypes(df, testArrowStringColumn: true); IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches(); foreach (RecordBatch batch in recordBatches) { RecordBatchComparer.CompareBatches(originalBatch, batch); } }
private RecordBatch createOutputRecordBatch(List <IRecord> rows) { var recordBatchBuilder = new RecordBatch.Builder(memoryAllocator); for (int i = 0; i < this.outputArrowSchema.Fields.Count; i++) { var field = this.outputArrowSchema.GetFieldByIndex(i); switch (field.DataType.TypeId) { case ArrowTypeId.Int64: recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Int64( array => array.AppendRange(rows.Select(row => Convert.ToInt64(row[i]))))); break; case ArrowTypeId.Double: recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Double( array => array.AppendRange(rows.Select(row => Convert.ToDouble(row[i]))))); break; case ArrowTypeId.String: recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.String( array => array.AppendRange(rows.Select(row => Convert.ToString(row[i]))))); break; case ArrowTypeId.Timestamp: recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Int64( array => array.AppendRange(rows.Select(row => (((DateTime)row[i]).Ticks - epoch.Ticks) / MicrosecToMillisecRatio)))); break; case ArrowTypeId.Binary: recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Binary( array => array.AppendRange(rows.Select(row => (byte[])(row[i]))))); break; case ArrowTypeId.Boolean: recordBatchBuilder.Append(field.Name, field.IsNullable, col => col.Boolean( array => array.AppendRange(rows.Select(row => Convert.ToBoolean(row[i]))))); break; default: throw new Exception("Unsupported Arrow type of output arrow schema: " + field.DataType.TypeId); } } return(recordBatchBuilder.Build()); }
public ByteString ToGprcArrowFrame() { MemoryStream stream = new MemoryStream(); var recordBatchBuilder = new RecordBatch.Builder(); foreach (Field field in fields) { recordBatchBuilder.Append(field.Name, true, field.ToArrowArray()); } var recordBatch = recordBatchBuilder.Build(); var writer = new ArrowFileWriter(stream, recordBatch.Schema); writer.WriteRecordBatch(recordBatch); writer.WriteEnd(); stream.Position = 0; return(ByteString.FromStream(stream)); }
public void TestEmptyArrowColumns() { // Tests to ensure that we don't crash and the internal NullCounts stay consistent on encountering: // 1. Data + Empty null bitmaps // 2. Empty Data + Null bitmaps // 3. Empty Data + Empty null bitmaps RecordBatch originalBatch = new RecordBatch.Builder() .Append("EmptyNullBitMapColumn", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append("EmptyDataColumn", true, new Int32Array( valueBuffer: ArrowBuffer.Empty, nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(), length: 10, nullCount: 10, offset: 0)).Build(); DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch); Assert.Equal(0, df.Columns["EmptyNullBitMapColumn"].NullCount); Assert.Equal(10, df.Columns["EmptyNullBitMapColumn"].Length); df.Columns["EmptyNullBitMapColumn"][9] = null; Assert.Equal(1, df.Columns["EmptyNullBitMapColumn"].NullCount); Assert.Equal(10, df.Columns["EmptyDataColumn"].NullCount); Assert.Equal(10, df.Columns["EmptyDataColumn"].Length); df.Columns["EmptyDataColumn"][9] = 9; Assert.Equal(9, df.Columns["EmptyDataColumn"].NullCount); Assert.Equal(10, df.Columns["EmptyDataColumn"].Length); for (int i = 0; i < 9; i++) { Assert.Equal(i, (int)df.Columns["EmptyNullBitMapColumn"][i]); Assert.Null(df.Columns["EmptyDataColumn"][i]); } RecordBatch batch1 = new RecordBatch.Builder() .Append("EmptyDataAndNullColumns", false, col => col.Int32(array => array.Clear())).Build(); DataFrame emptyDataFrame = DataFrame.FromArrowRecordBatch(batch1); Assert.Equal(0, emptyDataFrame.Rows.Count); Assert.Equal(0, emptyDataFrame.Columns["EmptyDataAndNullColumns"].Length); Assert.Equal(0, emptyDataFrame.Columns["EmptyDataAndNullColumns"].NullCount); }
private static void ExecuteActions(MemoryAllocator allocator, RecordBatch batch, IReadOnlyList <IAction> actions, int iterations) { var builder = new RecordBatch.Builder(allocator); for (var i = 0; i < iterations; i++) { foreach (var action in actions) { action.Execute(batch, builder); } try { batch = builder.Build(); builder = new RecordBatch.Builder(allocator); } catch (InvalidOperationException) { } } }
public void TestRecordBatchWithStructArrays() { RecordBatch CreateRecordBatch(string prependColumnNamesWith = "") { RecordBatch ret = new RecordBatch.Builder() .Append(prependColumnNamesWith + "Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append(prependColumnNamesWith + "Column2", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), length: 10, nullCount: 1, offset: 0)) .Append(prependColumnNamesWith + "Column3", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(), length: 10, nullCount: 10, offset: 0)) .Append(prependColumnNamesWith + "NullableBooleanColumn", true, new BooleanArray( valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(), length: 10, nullCount: 2, offset: 0)) .Append(prependColumnNamesWith + "StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build()) .Append(prependColumnNamesWith + "DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build()) .Append(prependColumnNamesWith + "FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build()) .Append(prependColumnNamesWith + "ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build()) .Append(prependColumnNamesWith + "LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build()) .Append(prependColumnNamesWith + "UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build()) .Append(prependColumnNamesWith + "UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build()) .Append(prependColumnNamesWith + "ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build()) .Append(prependColumnNamesWith + "ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build()) .Append(prependColumnNamesWith + "UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build()) .Build(); return(ret); } RecordBatch originalBatch = CreateRecordBatch(); ArrowBuffer.BitmapBuilder validityBitmapBuilder = new ArrowBuffer.BitmapBuilder(); for (int i = 0; i < originalBatch.Length; i++) { validityBitmapBuilder.Append(true); } ArrowBuffer validityBitmap = validityBitmapBuilder.Build(); StructType structType = new StructType(originalBatch.Schema.Fields.Select((KeyValuePair <string, Field> pair) => pair.Value).ToList()); StructArray structArray = new StructArray(structType, originalBatch.Length, originalBatch.Arrays.Cast <Apache.Arrow.Array>(), validityBitmap); Schema schema = new Schema.Builder().Field(new Field("Struct", structType, false)).Build(); RecordBatch recordBatch = new RecordBatch(schema, new[] { structArray }, originalBatch.Length); DataFrame df = DataFrame.FromArrowRecordBatch(recordBatch); DataFrameIOTests.VerifyColumnTypes(df, testArrowStringColumn: true); IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches(); RecordBatch expected = CreateRecordBatch("Struct_"); foreach (RecordBatch batch in recordBatches) { RecordBatchComparer.CompareBatches(expected, batch); } }
public void Execute(RecordBatch batch, RecordBatch.Builder batchBuilder) { batchBuilder.Append("Mass", false, batch.Column("Mass")); }