public void Visit(DoubleType type) => _array    = new DoubleArray(_data);
 public void Visit(StructType type) => _array    = new StructArray(_data);
Esempio n. 3
0
 private ListArray(ArrayData data, IArrowArray values) : base(data)
 {
     data.EnsureBufferCount(2);
     data.EnsureDataType(ArrowTypeId.List);
     Values = values;
 }
Esempio n. 4
0
 public static void CompareArrays(IArrowArray expectedArray, IArrowArray actualArray, bool strictCompare = true)
 {
     actualArray.Accept(new ArrayComparer(expectedArray, strictCompare));
 }
 public void Visit(StringType type) => _array    = new StringArray(_data);
Esempio n. 6
0
        private static void AppendDataFrameColumnFromArrowArray(Field field, IArrowArray arrowArray, DataFrame ret, string fieldNamePrefix = "")
        {
            IArrowType      fieldType       = field.DataType;
            DataFrameColumn dataFrameColumn = null;
            string          fieldName       = fieldNamePrefix + field.Name;

            switch (fieldType.TypeId)
            {
            case ArrowTypeId.Boolean:
                BooleanArray          arrowBooleanArray = (BooleanArray)arrowArray;
                ReadOnlyMemory <byte> valueBuffer       = arrowBooleanArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte> nullBitMapBuffer  = arrowBooleanArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new BooleanDataFrameColumn(fieldName, valueBuffer, nullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.Double:
                PrimitiveArray <double> arrowDoubleArray       = (PrimitiveArray <double>)arrowArray;
                ReadOnlyMemory <byte>   doubleValueBuffer      = arrowDoubleArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte>   doubleNullBitMapBuffer = arrowDoubleArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new DoubleDataFrameColumn(fieldName, doubleValueBuffer, doubleNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.Float:
                PrimitiveArray <float> arrowFloatArray       = (PrimitiveArray <float>)arrowArray;
                ReadOnlyMemory <byte>  floatValueBuffer      = arrowFloatArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte>  floatNullBitMapBuffer = arrowFloatArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new SingleDataFrameColumn(fieldName, floatValueBuffer, floatNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.Int8:
                PrimitiveArray <sbyte> arrowsbyteArray       = (PrimitiveArray <sbyte>)arrowArray;
                ReadOnlyMemory <byte>  sbyteValueBuffer      = arrowsbyteArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte>  sbyteNullBitMapBuffer = arrowsbyteArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new SByteDataFrameColumn(fieldName, sbyteValueBuffer, sbyteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.Int16:
                PrimitiveArray <short> arrowshortArray       = (PrimitiveArray <short>)arrowArray;
                ReadOnlyMemory <byte>  shortValueBuffer      = arrowshortArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte>  shortNullBitMapBuffer = arrowshortArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new Int16DataFrameColumn(fieldName, shortValueBuffer, shortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.Int32:
                PrimitiveArray <int>  arrowIntArray       = (PrimitiveArray <int>)arrowArray;
                ReadOnlyMemory <byte> intValueBuffer      = arrowIntArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte> intNullBitMapBuffer = arrowIntArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new Int32DataFrameColumn(fieldName, intValueBuffer, intNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.Int64:
                PrimitiveArray <long> arrowLongArray       = (PrimitiveArray <long>)arrowArray;
                ReadOnlyMemory <byte> longValueBuffer      = arrowLongArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte> longNullBitMapBuffer = arrowLongArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new Int64DataFrameColumn(fieldName, longValueBuffer, longNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.String:
                StringArray           stringArray   = (StringArray)arrowArray;
                ReadOnlyMemory <byte> dataMemory    = stringArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte> offsetsMemory = stringArray.ValueOffsetsBuffer.Memory;
                ReadOnlyMemory <byte> nullMemory    = stringArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new ArrowStringDataFrameColumn(fieldName, dataMemory, offsetsMemory, nullMemory, stringArray.Length, stringArray.NullCount);
                break;

            case ArrowTypeId.UInt8:
                PrimitiveArray <byte> arrowbyteArray       = (PrimitiveArray <byte>)arrowArray;
                ReadOnlyMemory <byte> byteValueBuffer      = arrowbyteArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte> byteNullBitMapBuffer = arrowbyteArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new ByteDataFrameColumn(fieldName, byteValueBuffer, byteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.UInt16:
                PrimitiveArray <ushort> arrowUshortArray       = (PrimitiveArray <ushort>)arrowArray;
                ReadOnlyMemory <byte>   ushortValueBuffer      = arrowUshortArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte>   ushortNullBitMapBuffer = arrowUshortArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new UInt16DataFrameColumn(fieldName, ushortValueBuffer, ushortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.UInt32:
                PrimitiveArray <uint> arrowUintArray       = (PrimitiveArray <uint>)arrowArray;
                ReadOnlyMemory <byte> uintValueBuffer      = arrowUintArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte> uintNullBitMapBuffer = arrowUintArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new UInt32DataFrameColumn(fieldName, uintValueBuffer, uintNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.UInt64:
                PrimitiveArray <ulong> arrowUlongArray       = (PrimitiveArray <ulong>)arrowArray;
                ReadOnlyMemory <byte>  ulongValueBuffer      = arrowUlongArray.ValueBuffer.Memory;
                ReadOnlyMemory <byte>  ulongNullBitMapBuffer = arrowUlongArray.NullBitmapBuffer.Memory;
                dataFrameColumn = new UInt64DataFrameColumn(fieldName, ulongValueBuffer, ulongNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount);
                break;

            case ArrowTypeId.Struct:
                StructArray               structArray           = (StructArray)arrowArray;
                StructType                structType            = (StructType)field.DataType;
                IEnumerator <Field>       fieldsEnumerator      = structType.Fields.GetEnumerator();
                IEnumerator <IArrowArray> structArrayEnumerator = structArray.Fields.GetEnumerator();
                while (fieldsEnumerator.MoveNext() && structArrayEnumerator.MoveNext())
                {
                    AppendDataFrameColumnFromArrowArray(fieldsEnumerator.Current, structArrayEnumerator.Current, ret, field.Name + "_");
                }
                break;

            case ArrowTypeId.Decimal:
            case ArrowTypeId.Binary:
            case ArrowTypeId.Date32:
            case ArrowTypeId.Date64:
            case ArrowTypeId.Dictionary:
            case ArrowTypeId.FixedSizedBinary:
            case ArrowTypeId.HalfFloat:
            case ArrowTypeId.Interval:
            case ArrowTypeId.List:
            case ArrowTypeId.Map:
            case ArrowTypeId.Null:
            case ArrowTypeId.Time32:
            case ArrowTypeId.Time64:
            default:
                throw new NotImplementedException(nameof(fieldType.Name));
            }

            if (dataFrameColumn != null)
            {
                ret.Columns.Insert(ret.Columns.Count, dataFrameColumn);
            }
        }
 public void Visit(Int8Type type) => _array      = new Int8Array(_data);
 public void Visit(TimestampType type) => _array = new TimestampArray(_data);
Esempio n. 9
0
 public void Visit(Decimal256Type type)
 {
     Array = new Decimal256Array(GetDecimalArrayData(type));
 }
Esempio n. 10
0
        public static Func <int, T> GetGetter <T>(IArrowArray array)
        {
            if (array is null)
            {
                return(null);
            }

            // TODO: determine fastest way to read out a value from the array.

            if (typeof(T) == typeof(bool))
            {
                var booleanArray = new BooleanArray(array.Data);
                return((Func <int, T>)(object) new Func <int, bool>(
                           index => booleanArray.GetBoolean(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(bool?))
            {
                var booleanArray = new BooleanArray(array.Data);
                return((Func <int, T>)(object) new Func <int, bool?>(booleanArray.GetBoolean));
            }

            if (typeof(T) == typeof(sbyte))
            {
                var int8Array = new Int8Array(array.Data);
                return((Func <int, T>)(object) new Func <int, sbyte>(
                           index => int8Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(sbyte?))
            {
                var int8Array = new Int8Array(array.Data);
                return((Func <int, T>)(object) new Func <int, sbyte?>(int8Array.GetValue));
            }

            if (typeof(T) == typeof(byte))
            {
                var uint8Array = new UInt8Array(array.Data);
                return((Func <int, T>)(object) new Func <int, byte>(
                           index => uint8Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(byte?))
            {
                var uint8Array = new UInt8Array(array.Data);
                return((Func <int, T>)(object) new Func <int, byte?>(uint8Array.GetValue));
            }

            if (typeof(T) == typeof(short))
            {
                var int16Array = new Int16Array(array.Data);
                return((Func <int, T>)(object) new Func <int, short>(
                           index => int16Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(short?))
            {
                var int16Array = new Int16Array(array.Data);
                return((Func <int, T>)(object) new Func <int, short?>(int16Array.GetValue));
            }

            if (typeof(T) == typeof(ushort))
            {
                var uint16Array = new UInt16Array(array.Data);
                return((Func <int, T>)(object) new Func <int, ushort>(
                           index => uint16Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(ushort?))
            {
                var uint16Array = new UInt16Array(array.Data);
                return((Func <int, T>)(object) new Func <int, ushort?>(uint16Array.GetValue));
            }

            if (typeof(T) == typeof(int))
            {
                var int32Array = new Int32Array(array.Data);
                return((Func <int, T>)(object) new Func <int, int>(
                           index => int32Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(int?))
            {
                var int32Array = new Int32Array(array.Data);
                return((Func <int, T>)(object) new Func <int, int?>(int32Array.GetValue));
            }

            if (typeof(T) == typeof(uint))
            {
                var uint32Array = new UInt32Array(array.Data);
                return((Func <int, T>)(object) new Func <int, uint>(
                           index => uint32Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(uint?))
            {
                var uint32Array = new UInt32Array(array.Data);
                return((Func <int, T>)(object) new Func <int, uint?>(uint32Array.GetValue));
            }

            if (typeof(T) == typeof(long))
            {
                var int64Array = new Int64Array(array.Data);
                return((Func <int, T>)(object) new Func <int, long>(
                           index => int64Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(long?))
            {
                var int64Array = new Int64Array(array.Data);
                return((Func <int, T>)(object) new Func <int, long?>(int64Array.GetValue));
            }

            if (typeof(T) == typeof(ulong))
            {
                var uint64Array = new UInt64Array(array.Data);
                return((Func <int, T>)(object) new Func <int, ulong>(
                           index => uint64Array.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(ulong?))
            {
                var uint64Array = new UInt64Array(array.Data);
                return((Func <int, T>)(object) new Func <int, ulong?>(uint64Array.GetValue));
            }

            if (typeof(T) == typeof(double))
            {
                var doubleArray = new DoubleArray(array.Data);
                return((Func <int, T>)(object) new Func <int, double>(
                           index => doubleArray.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(double?))
            {
                var doubleArray = new DoubleArray(array.Data);
                return((Func <int, T>)(object) new Func <int, double?>(doubleArray.GetValue));
            }

            if (typeof(T) == typeof(float))
            {
                var floatArray = new FloatArray(array.Data);
                return((Func <int, T>)(object) new Func <int, float>(
                           index => floatArray.GetValue(index).GetValueOrDefault()));
            }
            if (typeof(T) == typeof(float?))
            {
                var floatArray = new FloatArray(array.Data);
                return((Func <int, T>)(object) new Func <int, float?>(floatArray.GetValue));
            }

            if (typeof(T) == typeof(DateTime))
            {
                if (array.Data.DataType.TypeId == ArrowTypeId.Date32)
                {
                    var date32Array = new Date32Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTime>(
                               index => date32Array.GetDate(index).GetValueOrDefault().DateTime));
                }
                else if (array.Data.DataType.TypeId == ArrowTypeId.Date64)
                {
                    var date64Array = new Date64Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTime>(
                               index => date64Array.GetDate(index).GetValueOrDefault().DateTime));
                }
            }
            if (typeof(T) == typeof(DateTime?))
            {
                if (array.Data.DataType.TypeId == ArrowTypeId.Date32)
                {
                    var date32Array = new Date32Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTime?>(
                               index => date32Array.GetDate(index)?.DateTime));
                }
                else if (array.Data.DataType.TypeId == ArrowTypeId.Date64)
                {
                    var date64Array = new Date64Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTime?>(
                               index => date64Array.GetDate(index)?.DateTime));
                }
            }

            if (typeof(T) == typeof(DateTimeOffset))
            {
                if (array.Data.DataType.TypeId == ArrowTypeId.Date32)
                {
                    var date32Array = new Date32Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTimeOffset>(
                               index => date32Array.GetDate(index).GetValueOrDefault()));
                }
                else if (array.Data.DataType.TypeId == ArrowTypeId.Date64)
                {
                    var date64Array = new Date64Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTimeOffset>(
                               index => date64Array.GetDate(index).GetValueOrDefault()));
                }
            }
            if (typeof(T) == typeof(DateTimeOffset?))
            {
                if (array.Data.DataType.TypeId == ArrowTypeId.Date32)
                {
                    var date32Array = new Date32Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTimeOffset?>(
                               date32Array.GetDate));
                }
                else if (array.Data.DataType.TypeId == ArrowTypeId.Date64)
                {
                    var date64Array = new Date64Array(array.Data);
                    return((Func <int, T>)(object) new Func <int, DateTimeOffset?>(
                               date64Array.GetDate));
                }
            }

            if (typeof(T) == typeof(TimeSpan))
            {
                var timestampArray = new TimestampArray(array.Data);
                return((Func <int, T>)(object) new Func <int, TimeSpan>(
                           index => timestampArray.GetTimestamp(index).GetValueOrDefault().TimeOfDay));
            }
            if (typeof(T) == typeof(TimeSpan?))
            {
                var timestampArray = new TimestampArray(array.Data);
                return((Func <int, T>)(object) new Func <int, TimeSpan?>(
                           index => timestampArray.GetTimestamp(index)?.TimeOfDay));
            }

            if (typeof(T) == typeof(byte[]))
            {
                var binaryArray = new BinaryArray(array.Data);
                return((Func <int, T>)(object) new Func <int, byte[]>(
                           // TODO: how to avoid this allocation/copy?
                           index => binaryArray.GetBytes(index).ToArray()));
            }

            if (typeof(T) == typeof(string))
            {
                var stringArray = new StringArray(array.Data);
                return((Func <int, T>)(object) new Func <int, string>(
                           index => stringArray.GetString(index)));
            }

            // It's something else we don't yet support.
            switch (array.Data.DataType.TypeId)
            {
            case ArrowTypeId.Decimal:
            case ArrowTypeId.Dictionary:
            case ArrowTypeId.FixedSizedBinary:
            case ArrowTypeId.HalfFloat:
            case ArrowTypeId.Interval:
            case ArrowTypeId.List:
            case ArrowTypeId.Map:
            case ArrowTypeId.Null:
            case ArrowTypeId.Struct:
            case ArrowTypeId.Time32:
            case ArrowTypeId.Time64:
            case ArrowTypeId.Union:
            default:
                // TODO: support additional types?
                throw new NotSupportedException(
                          $"Not supported array type: {array.Data.DataType.TypeId}");
            }
        }
Esempio n. 11
0
 public void AddOrReplaceDictionary(long id, IArrowArray dictionary)
 {
     _idToDictionary[id] = dictionary;
 }
Esempio n. 12
0
 public ArrayComparer(IArrowArray expectedArray)
 {
     _expectedArray     = expectedArray;
     _arrayTypeComparer = new ArrayTypeComparer(expectedArray.Data.DataType);
 }
Esempio n. 13
0
 public ArrayComparer(IArrowArray expectedArray, bool strictCompare)
 {
     _expectedArray     = expectedArray;
     _arrayTypeComparer = new ArrayTypeComparer(expectedArray.Data.DataType);
     _strictCompare     = strictCompare;
 }
Esempio n. 14
0
 public void Visit(UnionType type) => _array     = new UnionArray(_data);
Esempio n. 15
0
 public void Visit(UInt16Type type) => _array    = new UInt16Array(_data);
Esempio n. 16
0
 public void Visit(ListType type) => _array      = new ListArray(_data);
Esempio n. 17
0
 public void Visit(UInt32Type type) => _array    = new UInt32Array(_data);
Esempio n. 18
0
 public void Visit(BinaryType type) => _array    = new BinaryArray(_data);
Esempio n. 19
0
 public void Visit(UInt64Type type) => _array    = new UInt64Array(_data);
Esempio n. 20
0
 public ArrayComparer(IArrowArray expectedArray)
 {
     _expectedArray = expectedArray;
 }
Esempio n. 21
0
 public void Visit(BooleanType type) => _array   = new BooleanArray(_data);
Esempio n. 22
0
        private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch,
                                                                   CancellationToken cancellationToken = default)
        {
            // TODO: Truncate buffers with extraneous padding / unused capacity

            if (!HasWrittenSchema)
            {
                await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false);

                HasWrittenSchema = true;
            }

            Builder.Clear();

            // Serialize field nodes

            int fieldCount = Schema.Fields.Count;

            Flatbuf.RecordBatch.StartNodesVector(Builder, CountAllNodes());

            // flatbuffer struct vectors have to be created in reverse order
            for (int i = fieldCount - 1; i >= 0; i--)
            {
                CreateSelfAndChildrenFieldNodes(recordBatch.Column(i).Data);
            }

            VectorOffset fieldNodesVectorOffset = Builder.EndVector();

            // Serialize buffers

            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();

            for (int i = 0; i < fieldCount; i++)
            {
                IArrowArray fieldArray = recordBatch.Column(i);
                fieldArray.Accept(recordBatchBuilder);
            }

            IReadOnlyList <ArrowRecordBatchFlatBufferBuilder.Buffer> buffers = recordBatchBuilder.Buffers;

            Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);

            // flatbuffer struct vectors have to be created in reverse order
            for (int i = buffers.Count - 1; i >= 0; i--)
            {
                Flatbuf.Buffer.CreateBuffer(Builder,
                                            buffers[i].Offset, buffers[i].DataBuffer.Length);
            }

            VectorOffset buffersVectorOffset = Builder.EndVector();

            // Serialize record batch

            StartingWritingRecordBatch();

            Offset <Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                                                                                                   fieldNodesVectorOffset,
                                                                                                   buffersVectorOffset);

            long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
                                                          recordBatchOffset, recordBatchBuilder.TotalLength,
                                                          cancellationToken).ConfigureAwait(false);

            // Write buffer data

            long bodyLength = 0;

            for (int i = 0; i < buffers.Count; i++)
            {
                ArrowBuffer buffer = buffers[i].DataBuffer;
                if (buffer.IsEmpty)
                {
                    continue;
                }

                await WriteBufferAsync(buffer, cancellationToken).ConfigureAwait(false);

                int paddedLength = checked ((int)BitUtility.RoundUpToMultipleOf8(buffer.Length));
                int padding      = paddedLength - buffer.Length;
                if (padding > 0)
                {
                    await WritePaddingAsync(padding).ConfigureAwait(false);
                }

                bodyLength += paddedLength;
            }

            // Write padding so the record batch message body length is a multiple of 8 bytes

            int bodyPaddingLength = CalculatePadding(bodyLength);

            await WritePaddingAsync(bodyPaddingLength).ConfigureAwait(false);

            FinishedWritingRecordBatch(bodyLength + bodyPaddingLength, metadataLength);
        }
Esempio n. 23
0
 public void Visit(FloatType type) => _array     = new FloatArray(_data);
Esempio n. 24
0
 public void Visit(IArrowArray array)
 {
     throw new NotImplementedException();
 }
Esempio n. 25
0
 public virtual void Visit(IArrowArray array)
 {
 }