public void Visit(DoubleType type) => _array = new DoubleArray(_data);
public void Visit(StructType type) => _array = new StructArray(_data);
private ListArray(ArrayData data, IArrowArray values) : base(data) { data.EnsureBufferCount(2); data.EnsureDataType(ArrowTypeId.List); Values = values; }
public static void CompareArrays(IArrowArray expectedArray, IArrowArray actualArray, bool strictCompare = true) { actualArray.Accept(new ArrayComparer(expectedArray, strictCompare)); }
public void Visit(StringType type) => _array = new StringArray(_data);
private static void AppendDataFrameColumnFromArrowArray(Field field, IArrowArray arrowArray, DataFrame ret, string fieldNamePrefix = "") { IArrowType fieldType = field.DataType; DataFrameColumn dataFrameColumn = null; string fieldName = fieldNamePrefix + field.Name; switch (fieldType.TypeId) { case ArrowTypeId.Boolean: BooleanArray arrowBooleanArray = (BooleanArray)arrowArray; ReadOnlyMemory <byte> valueBuffer = arrowBooleanArray.ValueBuffer.Memory; ReadOnlyMemory <byte> nullBitMapBuffer = arrowBooleanArray.NullBitmapBuffer.Memory; dataFrameColumn = new BooleanDataFrameColumn(fieldName, valueBuffer, nullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Double: PrimitiveArray <double> arrowDoubleArray = (PrimitiveArray <double>)arrowArray; ReadOnlyMemory <byte> doubleValueBuffer = arrowDoubleArray.ValueBuffer.Memory; ReadOnlyMemory <byte> doubleNullBitMapBuffer = arrowDoubleArray.NullBitmapBuffer.Memory; dataFrameColumn = new DoubleDataFrameColumn(fieldName, doubleValueBuffer, doubleNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Float: PrimitiveArray <float> arrowFloatArray = (PrimitiveArray <float>)arrowArray; ReadOnlyMemory <byte> floatValueBuffer = arrowFloatArray.ValueBuffer.Memory; ReadOnlyMemory <byte> floatNullBitMapBuffer = arrowFloatArray.NullBitmapBuffer.Memory; dataFrameColumn = new SingleDataFrameColumn(fieldName, floatValueBuffer, floatNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int8: PrimitiveArray <sbyte> arrowsbyteArray = (PrimitiveArray <sbyte>)arrowArray; ReadOnlyMemory <byte> sbyteValueBuffer = arrowsbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> sbyteNullBitMapBuffer = arrowsbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new SByteDataFrameColumn(fieldName, sbyteValueBuffer, sbyteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int16: PrimitiveArray <short> arrowshortArray = (PrimitiveArray <short>)arrowArray; ReadOnlyMemory <byte> shortValueBuffer = arrowshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> shortNullBitMapBuffer = arrowshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new Int16DataFrameColumn(fieldName, shortValueBuffer, shortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int32: PrimitiveArray <int> arrowIntArray = (PrimitiveArray <int>)arrowArray; ReadOnlyMemory <byte> intValueBuffer = arrowIntArray.ValueBuffer.Memory; ReadOnlyMemory <byte> intNullBitMapBuffer = arrowIntArray.NullBitmapBuffer.Memory; dataFrameColumn = new Int32DataFrameColumn(fieldName, intValueBuffer, intNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int64: PrimitiveArray <long> arrowLongArray = (PrimitiveArray <long>)arrowArray; ReadOnlyMemory <byte> longValueBuffer = arrowLongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> longNullBitMapBuffer = arrowLongArray.NullBitmapBuffer.Memory; dataFrameColumn = new Int64DataFrameColumn(fieldName, longValueBuffer, longNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.String: StringArray stringArray = (StringArray)arrowArray; ReadOnlyMemory <byte> dataMemory = stringArray.ValueBuffer.Memory; ReadOnlyMemory <byte> offsetsMemory = stringArray.ValueOffsetsBuffer.Memory; ReadOnlyMemory <byte> nullMemory = stringArray.NullBitmapBuffer.Memory; dataFrameColumn = new ArrowStringDataFrameColumn(fieldName, dataMemory, offsetsMemory, nullMemory, stringArray.Length, stringArray.NullCount); break; case ArrowTypeId.UInt8: PrimitiveArray <byte> arrowbyteArray = (PrimitiveArray <byte>)arrowArray; ReadOnlyMemory <byte> byteValueBuffer = arrowbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> byteNullBitMapBuffer = arrowbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new ByteDataFrameColumn(fieldName, byteValueBuffer, byteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt16: PrimitiveArray <ushort> arrowUshortArray = (PrimitiveArray <ushort>)arrowArray; ReadOnlyMemory <byte> ushortValueBuffer = arrowUshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ushortNullBitMapBuffer = arrowUshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new UInt16DataFrameColumn(fieldName, ushortValueBuffer, ushortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt32: PrimitiveArray <uint> arrowUintArray = (PrimitiveArray <uint>)arrowArray; ReadOnlyMemory <byte> uintValueBuffer = arrowUintArray.ValueBuffer.Memory; ReadOnlyMemory <byte> uintNullBitMapBuffer = arrowUintArray.NullBitmapBuffer.Memory; dataFrameColumn = new UInt32DataFrameColumn(fieldName, uintValueBuffer, uintNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt64: PrimitiveArray <ulong> arrowUlongArray = (PrimitiveArray <ulong>)arrowArray; ReadOnlyMemory <byte> ulongValueBuffer = arrowUlongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ulongNullBitMapBuffer = arrowUlongArray.NullBitmapBuffer.Memory; dataFrameColumn = new UInt64DataFrameColumn(fieldName, ulongValueBuffer, ulongNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Struct: StructArray structArray = (StructArray)arrowArray; StructType structType = (StructType)field.DataType; IEnumerator <Field> fieldsEnumerator = structType.Fields.GetEnumerator(); IEnumerator <IArrowArray> structArrayEnumerator = structArray.Fields.GetEnumerator(); while (fieldsEnumerator.MoveNext() && structArrayEnumerator.MoveNext()) { AppendDataFrameColumnFromArrowArray(fieldsEnumerator.Current, structArrayEnumerator.Current, ret, field.Name + "_"); } break; case ArrowTypeId.Decimal: case ArrowTypeId.Binary: case ArrowTypeId.Date32: case ArrowTypeId.Date64: case ArrowTypeId.Dictionary: case ArrowTypeId.FixedSizedBinary: case ArrowTypeId.HalfFloat: case ArrowTypeId.Interval: case ArrowTypeId.List: case ArrowTypeId.Map: case ArrowTypeId.Null: case ArrowTypeId.Time32: case ArrowTypeId.Time64: default: throw new NotImplementedException(nameof(fieldType.Name)); } if (dataFrameColumn != null) { ret.Columns.Insert(ret.Columns.Count, dataFrameColumn); } }
public void Visit(Int8Type type) => _array = new Int8Array(_data);
public void Visit(TimestampType type) => _array = new TimestampArray(_data);
public void Visit(Decimal256Type type) { Array = new Decimal256Array(GetDecimalArrayData(type)); }
public static Func <int, T> GetGetter <T>(IArrowArray array) { if (array is null) { return(null); } // TODO: determine fastest way to read out a value from the array. if (typeof(T) == typeof(bool)) { var booleanArray = new BooleanArray(array.Data); return((Func <int, T>)(object) new Func <int, bool>( index => booleanArray.GetBoolean(index).GetValueOrDefault())); } if (typeof(T) == typeof(bool?)) { var booleanArray = new BooleanArray(array.Data); return((Func <int, T>)(object) new Func <int, bool?>(booleanArray.GetBoolean)); } if (typeof(T) == typeof(sbyte)) { var int8Array = new Int8Array(array.Data); return((Func <int, T>)(object) new Func <int, sbyte>( index => int8Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(sbyte?)) { var int8Array = new Int8Array(array.Data); return((Func <int, T>)(object) new Func <int, sbyte?>(int8Array.GetValue)); } if (typeof(T) == typeof(byte)) { var uint8Array = new UInt8Array(array.Data); return((Func <int, T>)(object) new Func <int, byte>( index => uint8Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(byte?)) { var uint8Array = new UInt8Array(array.Data); return((Func <int, T>)(object) new Func <int, byte?>(uint8Array.GetValue)); } if (typeof(T) == typeof(short)) { var int16Array = new Int16Array(array.Data); return((Func <int, T>)(object) new Func <int, short>( index => int16Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(short?)) { var int16Array = new Int16Array(array.Data); return((Func <int, T>)(object) new Func <int, short?>(int16Array.GetValue)); } if (typeof(T) == typeof(ushort)) { var uint16Array = new UInt16Array(array.Data); return((Func <int, T>)(object) new Func <int, ushort>( index => uint16Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(ushort?)) { var uint16Array = new UInt16Array(array.Data); return((Func <int, T>)(object) new Func <int, ushort?>(uint16Array.GetValue)); } if (typeof(T) == typeof(int)) { var int32Array = new Int32Array(array.Data); return((Func <int, T>)(object) new Func <int, int>( index => int32Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(int?)) { var int32Array = new Int32Array(array.Data); return((Func <int, T>)(object) new Func <int, int?>(int32Array.GetValue)); } if (typeof(T) == typeof(uint)) { var uint32Array = new UInt32Array(array.Data); return((Func <int, T>)(object) new Func <int, uint>( index => uint32Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(uint?)) { var uint32Array = new UInt32Array(array.Data); return((Func <int, T>)(object) new Func <int, uint?>(uint32Array.GetValue)); } if (typeof(T) == typeof(long)) { var int64Array = new Int64Array(array.Data); return((Func <int, T>)(object) new Func <int, long>( index => int64Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(long?)) { var int64Array = new Int64Array(array.Data); return((Func <int, T>)(object) new Func <int, long?>(int64Array.GetValue)); } if (typeof(T) == typeof(ulong)) { var uint64Array = new UInt64Array(array.Data); return((Func <int, T>)(object) new Func <int, ulong>( index => uint64Array.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(ulong?)) { var uint64Array = new UInt64Array(array.Data); return((Func <int, T>)(object) new Func <int, ulong?>(uint64Array.GetValue)); } if (typeof(T) == typeof(double)) { var doubleArray = new DoubleArray(array.Data); return((Func <int, T>)(object) new Func <int, double>( index => doubleArray.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(double?)) { var doubleArray = new DoubleArray(array.Data); return((Func <int, T>)(object) new Func <int, double?>(doubleArray.GetValue)); } if (typeof(T) == typeof(float)) { var floatArray = new FloatArray(array.Data); return((Func <int, T>)(object) new Func <int, float>( index => floatArray.GetValue(index).GetValueOrDefault())); } if (typeof(T) == typeof(float?)) { var floatArray = new FloatArray(array.Data); return((Func <int, T>)(object) new Func <int, float?>(floatArray.GetValue)); } if (typeof(T) == typeof(DateTime)) { if (array.Data.DataType.TypeId == ArrowTypeId.Date32) { var date32Array = new Date32Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTime>( index => date32Array.GetDate(index).GetValueOrDefault().DateTime)); } else if (array.Data.DataType.TypeId == ArrowTypeId.Date64) { var date64Array = new Date64Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTime>( index => date64Array.GetDate(index).GetValueOrDefault().DateTime)); } } if (typeof(T) == typeof(DateTime?)) { if (array.Data.DataType.TypeId == ArrowTypeId.Date32) { var date32Array = new Date32Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTime?>( index => date32Array.GetDate(index)?.DateTime)); } else if (array.Data.DataType.TypeId == ArrowTypeId.Date64) { var date64Array = new Date64Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTime?>( index => date64Array.GetDate(index)?.DateTime)); } } if (typeof(T) == typeof(DateTimeOffset)) { if (array.Data.DataType.TypeId == ArrowTypeId.Date32) { var date32Array = new Date32Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTimeOffset>( index => date32Array.GetDate(index).GetValueOrDefault())); } else if (array.Data.DataType.TypeId == ArrowTypeId.Date64) { var date64Array = new Date64Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTimeOffset>( index => date64Array.GetDate(index).GetValueOrDefault())); } } if (typeof(T) == typeof(DateTimeOffset?)) { if (array.Data.DataType.TypeId == ArrowTypeId.Date32) { var date32Array = new Date32Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTimeOffset?>( date32Array.GetDate)); } else if (array.Data.DataType.TypeId == ArrowTypeId.Date64) { var date64Array = new Date64Array(array.Data); return((Func <int, T>)(object) new Func <int, DateTimeOffset?>( date64Array.GetDate)); } } if (typeof(T) == typeof(TimeSpan)) { var timestampArray = new TimestampArray(array.Data); return((Func <int, T>)(object) new Func <int, TimeSpan>( index => timestampArray.GetTimestamp(index).GetValueOrDefault().TimeOfDay)); } if (typeof(T) == typeof(TimeSpan?)) { var timestampArray = new TimestampArray(array.Data); return((Func <int, T>)(object) new Func <int, TimeSpan?>( index => timestampArray.GetTimestamp(index)?.TimeOfDay)); } if (typeof(T) == typeof(byte[])) { var binaryArray = new BinaryArray(array.Data); return((Func <int, T>)(object) new Func <int, byte[]>( // TODO: how to avoid this allocation/copy? index => binaryArray.GetBytes(index).ToArray())); } if (typeof(T) == typeof(string)) { var stringArray = new StringArray(array.Data); return((Func <int, T>)(object) new Func <int, string>( index => stringArray.GetString(index))); } // It's something else we don't yet support. switch (array.Data.DataType.TypeId) { case ArrowTypeId.Decimal: case ArrowTypeId.Dictionary: case ArrowTypeId.FixedSizedBinary: case ArrowTypeId.HalfFloat: case ArrowTypeId.Interval: case ArrowTypeId.List: case ArrowTypeId.Map: case ArrowTypeId.Null: case ArrowTypeId.Struct: case ArrowTypeId.Time32: case ArrowTypeId.Time64: case ArrowTypeId.Union: default: // TODO: support additional types? throw new NotSupportedException( $"Not supported array type: {array.Data.DataType.TypeId}"); } }
public void AddOrReplaceDictionary(long id, IArrowArray dictionary) { _idToDictionary[id] = dictionary; }
public ArrayComparer(IArrowArray expectedArray) { _expectedArray = expectedArray; _arrayTypeComparer = new ArrayTypeComparer(expectedArray.Data.DataType); }
public ArrayComparer(IArrowArray expectedArray, bool strictCompare) { _expectedArray = expectedArray; _arrayTypeComparer = new ArrayTypeComparer(expectedArray.Data.DataType); _strictCompare = strictCompare; }
public void Visit(UnionType type) => _array = new UnionArray(_data);
public void Visit(UInt16Type type) => _array = new UInt16Array(_data);
public void Visit(ListType type) => _array = new ListArray(_data);
public void Visit(UInt32Type type) => _array = new UInt32Array(_data);
public void Visit(BinaryType type) => _array = new BinaryArray(_data);
public void Visit(UInt64Type type) => _array = new UInt64Array(_data);
public ArrayComparer(IArrowArray expectedArray) { _expectedArray = expectedArray; }
public void Visit(BooleanType type) => _array = new BooleanArray(_data);
private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch, CancellationToken cancellationToken = default) { // TODO: Truncate buffers with extraneous padding / unused capacity if (!HasWrittenSchema) { await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false); HasWrittenSchema = true; } Builder.Clear(); // Serialize field nodes int fieldCount = Schema.Fields.Count; Flatbuf.RecordBatch.StartNodesVector(Builder, CountAllNodes()); // flatbuffer struct vectors have to be created in reverse order for (int i = fieldCount - 1; i >= 0; i--) { CreateSelfAndChildrenFieldNodes(recordBatch.Column(i).Data); } VectorOffset fieldNodesVectorOffset = Builder.EndVector(); // Serialize buffers var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(); for (int i = 0; i < fieldCount; i++) { IArrowArray fieldArray = recordBatch.Column(i); fieldArray.Accept(recordBatchBuilder); } IReadOnlyList <ArrowRecordBatchFlatBufferBuilder.Buffer> buffers = recordBatchBuilder.Buffers; Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count); // flatbuffer struct vectors have to be created in reverse order for (int i = buffers.Count - 1; i >= 0; i--) { Flatbuf.Buffer.CreateBuffer(Builder, buffers[i].Offset, buffers[i].DataBuffer.Length); } VectorOffset buffersVectorOffset = Builder.EndVector(); // Serialize record batch StartingWritingRecordBatch(); Offset <Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length, fieldNodesVectorOffset, buffersVectorOffset); long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch, recordBatchOffset, recordBatchBuilder.TotalLength, cancellationToken).ConfigureAwait(false); // Write buffer data long bodyLength = 0; for (int i = 0; i < buffers.Count; i++) { ArrowBuffer buffer = buffers[i].DataBuffer; if (buffer.IsEmpty) { continue; } await WriteBufferAsync(buffer, cancellationToken).ConfigureAwait(false); int paddedLength = checked ((int)BitUtility.RoundUpToMultipleOf8(buffer.Length)); int padding = paddedLength - buffer.Length; if (padding > 0) { await WritePaddingAsync(padding).ConfigureAwait(false); } bodyLength += paddedLength; } // Write padding so the record batch message body length is a multiple of 8 bytes int bodyPaddingLength = CalculatePadding(bodyLength); await WritePaddingAsync(bodyPaddingLength).ConfigureAwait(false); FinishedWritingRecordBatch(bodyLength + bodyPaddingLength, metadataLength); }
public void Visit(FloatType type) => _array = new FloatArray(_data);
public void Visit(IArrowArray array) { throw new NotImplementedException(); }
public virtual void Visit(IArrowArray array) { }