Exemplo n.º 1
0
        private void ReadDictionaryBatch(Flatbuf.DictionaryBatch dictionaryBatch, ByteBuffer bodyByteBuffer, IMemoryOwner <byte> memoryOwner)
        {
            long       id        = dictionaryBatch.Id;
            IArrowType valueType = DictionaryMemo.GetDictionaryType(id);

            Flatbuf.RecordBatch?recordBatch = dictionaryBatch.Data;

            if (!recordBatch.HasValue)
            {
                throw new InvalidDataException("Dictionary must contain RecordBatch");
            }

            Field valueField           = new Field("dummy", valueType, true);
            var   schema               = new Schema(new[] { valueField }, default);
            IList <IArrowArray> arrays = BuildArrays(schema, bodyByteBuffer, recordBatch.Value);

            if (arrays.Count != 1)
            {
                throw new InvalidDataException("Dictionary record batch must contain only one field");
            }

            if (dictionaryBatch.IsDelta)
            {
                throw new NotImplementedException("Dictionary delta is not supported yet");
            }
            else
            {
                DictionaryMemo.AddOrReplaceDictionary(id, arrays[0]);
            }
        }
Exemplo n.º 2
0
 protected BuilderBase(IArrowType dataType)
 {
     DataType       = dataType;
     ValueOffsets   = new ArrowBuffer.Builder <int>();
     ValueBuffer    = new ArrowBuffer.Builder <byte>();
     ValidityBuffer = new BooleanArray.Builder();
 }
Exemplo n.º 3
0
    private static ITypeSchema ArrowTypeToASAType(IArrowType arrowType)
    {
        switch (arrowType.TypeId)
        {
        case ArrowTypeId.Int32:
        case ArrowTypeId.Int64:
            return(PrimitiveSchema.BigintSchema);

        case ArrowTypeId.Float:
        case ArrowTypeId.Double:
            return(PrimitiveSchema.DoubleSchema);

        case ArrowTypeId.String:
            return(PrimitiveSchema.StringSchema);

        case ArrowTypeId.Timestamp:
            return(PrimitiveSchema.DateTimeSchema);

        case ArrowTypeId.Binary:
            return(PrimitiveSchema.BinarySchema);

        case ArrowTypeId.Boolean:
            return(PrimitiveSchema.BitSchema);

        default: throw new Exception("Unsupported Arrow type: " + arrowType.TypeId);
        }
    }
Exemplo n.º 4
0
        public void AddField(long id, Field field)
        {
            if (_fieldToId.ContainsKey(field))
            {
                throw new ArgumentException($"Field {field.Name} is already in Memo");
            }

            if (field.DataType.TypeId != ArrowTypeId.Dictionary)
            {
                throw new ArgumentException($"Field type is not DictionaryType: Name={field.Name}, {field.DataType.Name}");
            }

            IArrowType valueType = ((DictionaryType)field.DataType).ValueType;

            if (_idToValueType.TryGetValue(id, out IArrowType valueTypeInDic))
            {
                if (valueType != valueTypeInDic)
                {
                    throw new ArgumentException($"Field type {field.DataType.Name} does not match the existing type {valueTypeInDic})");
                }
            }

            _fieldToId.Add(field, id);
            _idToValueType.Add(id, valueType);
        }
Exemplo n.º 5
0
        public readonly ArrayData Dictionary; //Only used for dictionary type

        //This is left for compatibility with lower version binaries
        //before the dictionary type was supported.
        public ArrayData(
            IArrowType dataType,
            int length, int nullCount, int offset,
            IEnumerable <ArrowBuffer> buffers, IEnumerable <ArrayData> children) :
            this(dataType, length, nullCount, offset, buffers, children, null)
        {
        }
Exemplo n.º 6
0
 //This is left for compatibility with lower version binaries
 //before the dictionary type was supported.
 public ArrayData(
     IArrowType dataType,
     int length, int nullCount, int offset,
     ArrowBuffer[] buffers, ArrayData[] children) :
     this(dataType, length, nullCount, offset, buffers, children, null)
 {
 }
Exemplo n.º 7
0
 internal Builder(ListType dataType)
 {
     ValueBuilder = ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
     ValueOffsetsBufferBuilder = new ArrowBuffer.Builder <int>();
     ValidityBufferBuilder     = new BooleanArray.Builder();
     DataType = dataType;
 }
Exemplo n.º 8
0
 protected BuilderBase(IArrowType dataType, int byteWidth)
 {
     DataType       = dataType;
     ByteWidth      = byteWidth;
     ValueBuffer    = new ArrowBuffer.Builder <byte>();
     ValidityBuffer = new ArrowBuffer.BitmapBuilder();
 }
Exemplo n.º 9
0
 public void Visit(IArrowType actualType)
 {
     if (_expectedType.TypeId == actualType.TypeId)
     {
         _dataTypeMatch = true;
     }
 }
Exemplo n.º 10
0
        private IEnumerable <RecordBatch> GetInputIterator(Stream inputStream)
        {
            using (var reader = new ArrowStreamReader(inputStream, leaveOpen: true))
            {
                RecordBatch batch;
                bool        returnedResult = false;
                while ((batch = reader.ReadNextRecordBatch()) != null)
                {
                    yield return(batch);

                    returnedResult = true;
                }

                if (!returnedResult)
                {
                    // When no input batches were received, return an empty RecordBatch
                    // in order to create and write back the result schema.

                    int columnCount = reader.Schema.Fields.Count;
                    var arrays      = new IArrowArray[columnCount];
                    for (int i = 0; i < columnCount; ++i)
                    {
                        IArrowType type = reader.Schema.GetFieldByIndex(i).DataType;
                        arrays[i] = ArrowArrayHelpers.CreateEmptyArray(type);
                    }
                    yield return(new RecordBatch(reader.Schema, arrays, 0));
                }
            }
        }
            private void GenerateTestData <T, TArray, TArrayBuilder>(IArrowType type, Func <int, T> generator)
                where TArrayBuilder : IArrowArrayBuilder <T, TArray, TArrayBuilder>
                where TArray : IArrowArray
            {
                var resultBuilder = (IArrowArrayBuilder <T, TArray, TArrayBuilder>)ArrayArrayBuilderFactoryReflector.InvokeBuild(type);

                resultBuilder.Reserve(_baseDataTotalElementCount);

                for (int i = 0; i < _baseDataListCount; i++)
                {
                    List <int?> dataList = _baseData[i];
                    var         builder  = (IArrowArrayBuilder <T, TArray, TArrayBuilder>)ArrayArrayBuilderFactoryReflector.InvokeBuild(type);
                    builder.Reserve(dataList.Count);

                    foreach (int?value in dataList)
                    {
                        if (value.HasValue)
                        {
                            builder.Append(generator(value.Value));
                            resultBuilder.Append(generator(value.Value));
                        }
                        else
                        {
                            builder.AppendNull();
                            resultBuilder.AppendNull();
                        }
                    }
                    TestTargetArrayList.Add(builder.Build(default));
Exemplo n.º 12
0
 public Builder()
 {
     _metadata = new Dictionary <string, string>();
     _name     = string.Empty;
     _type     = NullType.Default;
     _nullable = true;
 }
Exemplo n.º 13
0
 public ListArray(IArrowType dataType, int length,
                  ArrowBuffer valueOffsetsBuffer, IArrowArray values,
                  ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
     : this(new ArrayData(dataType, length, nullCount, offset,
                          new[] { nullBitmapBuffer, valueOffsetsBuffer }, new[] { values.Data }),
            values)
 {
 }
Exemplo n.º 14
0
        internal Field(string name, IArrowType dataType, bool nullable,
                       IReadOnlyDictionary <string, string> metadata, bool copyCollections)
            : this(name, dataType, nullable)
        {
            Debug.Assert(copyCollections == false, "This internal constructor is to not copy the collections.");

            Metadata = metadata;
        }
Exemplo n.º 15
0
 private void CheckData(IArrowType type, int expectedBufferCount)
 {
     foreach (ArrayData arrayData in _arrayDataList)
     {
         arrayData.EnsureDataType(type.TypeId);
         arrayData.EnsureBufferCount(expectedBufferCount);
     }
 }
Exemplo n.º 16
0
 public StructArray(
     IArrowType dataType, int length,
     IEnumerable <Array> children,
     ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
     : this(new ArrayData(
                dataType, length, nullCount, offset, new[] { nullBitmapBuffer },
                children.Select(child => child.Data)))
 {
 }
Exemplo n.º 17
0
            private void ConcatenateVariableBinaryArrayData(IArrowType type)
            {
                CheckData(type, 3);
                ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
                ArrowBuffer offsetBuffer   = ConcatenateOffsetBuffer();
                ArrowBuffer valueBuffer    = ConcatenateVariableBinaryValueBuffer();

                Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer });
            }
Exemplo n.º 18
0
 protected PrimitiveDictionaryArray(IArrowType dataType, int length, int uniqueValuesCount,
                                    ArrowBuffer nullBitmapBuffer,
                                    ArrowBuffer indices,
                                    ArrowBuffer dataBuffer,
                                    int nullCount = 0, int offset = 0)
     : this(new ArrayData(dataType, length, nullCount, offset,
                          new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValuesCount)
 {
 }
Exemplo n.º 19
0
        private VectorOffset GetChildrenFieldOffset(Field field)
        {
            IArrowType targetDataType = field.DataType is DictionaryType dictionaryType ?
                                        dictionaryType.ValueType :
                                        field.DataType;

            if (!(targetDataType is NestedType type))
            {
                return(default);
Exemplo n.º 20
0
 public BinaryArray(IArrowType dataType, int length,
                    ArrowBuffer valueOffsetsBuffer,
                    ArrowBuffer dataBuffer,
                    ArrowBuffer nullBitmapBuffer,
                    int nullCount = 0, int offset = 0)
     : this(new ArrayData(dataType, length, nullCount, offset,
                          new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
 {
 }
Exemplo n.º 21
0
        private Field(string name, IArrowType dataType, bool nullable)
        {
            if (string.IsNullOrWhiteSpace(name))
            {
                throw new ArgumentNullException(nameof(name));
            }

            Name       = name;
            DataType   = dataType ?? NullType.Default;
            IsNullable = nullable;
        }
Exemplo n.º 22
0
            private void CreateNumberArray <T>(IArrowType type)
                where T : struct
            {
                ArrowBuffer.Builder <T> builder = new ArrowBuffer.Builder <T>(_length);
                for (int i = 0; i < _length; i++)
                {
                    builder.Append((T)Convert.ChangeType(i, typeof(T)));
                }

                Buffer = builder.Build();
            }
Exemplo n.º 23
0
        public DictionaryType(IArrowType indexType, IArrowType valueType, bool ordered)
        {
            if (!(indexType is IntegerType))
            {
                throw new ArgumentException($"{nameof(indexType)} must be integer");
            }

            IndexType = indexType;
            ValueType = valueType;
            Ordered   = ordered;
        }
Exemplo n.º 24
0
 private static void CountSelfAndChildrenNodes(IArrowType type, ref int count)
 {
     if (type is NestedType nestedType)
     {
         foreach (Field childField in nestedType.Fields)
         {
             CountSelfAndChildrenNodes(childField.DataType, ref count);
         }
     }
     count++;
 }
Exemplo n.º 25
0
            public void FieldsHaveExpectedValues(string name, IArrowType type, bool nullable)
            {
                var schema = new Schema.Builder()
                             .Field(f => f.Name(name).DataType(type).Nullable(nullable))
                             .Build();

                var field = schema.Fields[name];

                Assert.Equal(name, field.Name);
                Assert.Equal(type.Name, field.DataType.Name);
                Assert.Equal(nullable, field.IsNullable);
            }
Exemplo n.º 26
0
 public ArrayData(
     IArrowType dataType,
     int length, int nullCount = 0, int offset              = 0,
     ArrowBuffer[] buffers     = null, ArrayData[] children = null)
 {
     DataType  = dataType ?? NullType.Default;
     Length    = length;
     NullCount = nullCount;
     Offset    = offset;
     Buffers   = buffers;
     Children  = children;
 }
Exemplo n.º 27
0
 public ArrayData(
     IArrowType dataType,
     int length, int nullCount         = 0, int offset = 0,
     IEnumerable <ArrowBuffer> buffers = null, IEnumerable <ArrayData> children = null)
 {
     DataType  = dataType ?? NullType.Default;
     Length    = length;
     NullCount = nullCount;
     Offset    = offset;
     Buffers   = buffers?.ToArray();
     Children  = children?.ToArray();
 }
Exemplo n.º 28
0
        public ListEncoder(Column column)
        {
            Debug.Assert(column.Children.Count == 1);
            _getFunc  = column.GetFunction;
            _nullable = column.IsNullable;
            var child = column.Children.First();

            offsetBuilder = new ArrowBuffer.Builder <int>();

            _childEncoder = EncoderHelper.GetEncoder(child);
            _valueType    = TypeConverter.Convert(column);
            nullBitmap    = new ArrowBuffer.BitmapBuilder();
        }
Exemplo n.º 29
0
        public Field(string name, IArrowType dataType, bool nullable,
                     IEnumerable <KeyValuePair <string, string> > metadata = default)
        {
            if (string.IsNullOrWhiteSpace(name))
            {
                throw new ArgumentNullException(nameof(name));
            }

            Name       = name;
            DataType   = dataType ?? NullType.Default;
            IsNullable = nullable;
            Metadata   = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
        }
Exemplo n.º 30
0
        private static ArrayData BuildEmptyArrayDataFromArrowType(IArrowType arrowType)
        {
            if (s_twoBufferArrowTypes.Contains(arrowType.TypeId))
            {
                return(new ArrayData(arrowType, 0,
                                     buffers: new[] { ArrowBuffer.Empty, ArrowBuffer.Empty }));
            }

            if (s_threeBufferArrowTypes.Contains(arrowType.TypeId))
            {
                return(new ArrayData(arrowType, 0,
                                     buffers: new[] { ArrowBuffer.Empty, ArrowBuffer.Empty, ArrowBuffer.Empty }));
            }

            throw new NotSupportedException($"Unsupported type: {arrowType.TypeId}");
        }