private void ReadDictionaryBatch(Flatbuf.DictionaryBatch dictionaryBatch, ByteBuffer bodyByteBuffer, IMemoryOwner <byte> memoryOwner) { long id = dictionaryBatch.Id; IArrowType valueType = DictionaryMemo.GetDictionaryType(id); Flatbuf.RecordBatch?recordBatch = dictionaryBatch.Data; if (!recordBatch.HasValue) { throw new InvalidDataException("Dictionary must contain RecordBatch"); } Field valueField = new Field("dummy", valueType, true); var schema = new Schema(new[] { valueField }, default); IList <IArrowArray> arrays = BuildArrays(schema, bodyByteBuffer, recordBatch.Value); if (arrays.Count != 1) { throw new InvalidDataException("Dictionary record batch must contain only one field"); } if (dictionaryBatch.IsDelta) { throw new NotImplementedException("Dictionary delta is not supported yet"); } else { DictionaryMemo.AddOrReplaceDictionary(id, arrays[0]); } }
protected BuilderBase(IArrowType dataType) { DataType = dataType; ValueOffsets = new ArrowBuffer.Builder <int>(); ValueBuffer = new ArrowBuffer.Builder <byte>(); ValidityBuffer = new BooleanArray.Builder(); }
private static ITypeSchema ArrowTypeToASAType(IArrowType arrowType) { switch (arrowType.TypeId) { case ArrowTypeId.Int32: case ArrowTypeId.Int64: return(PrimitiveSchema.BigintSchema); case ArrowTypeId.Float: case ArrowTypeId.Double: return(PrimitiveSchema.DoubleSchema); case ArrowTypeId.String: return(PrimitiveSchema.StringSchema); case ArrowTypeId.Timestamp: return(PrimitiveSchema.DateTimeSchema); case ArrowTypeId.Binary: return(PrimitiveSchema.BinarySchema); case ArrowTypeId.Boolean: return(PrimitiveSchema.BitSchema); default: throw new Exception("Unsupported Arrow type: " + arrowType.TypeId); } }
public void AddField(long id, Field field) { if (_fieldToId.ContainsKey(field)) { throw new ArgumentException($"Field {field.Name} is already in Memo"); } if (field.DataType.TypeId != ArrowTypeId.Dictionary) { throw new ArgumentException($"Field type is not DictionaryType: Name={field.Name}, {field.DataType.Name}"); } IArrowType valueType = ((DictionaryType)field.DataType).ValueType; if (_idToValueType.TryGetValue(id, out IArrowType valueTypeInDic)) { if (valueType != valueTypeInDic) { throw new ArgumentException($"Field type {field.DataType.Name} does not match the existing type {valueTypeInDic})"); } } _fieldToId.Add(field, id); _idToValueType.Add(id, valueType); }
public readonly ArrayData Dictionary; //Only used for dictionary type //This is left for compatibility with lower version binaries //before the dictionary type was supported. public ArrayData( IArrowType dataType, int length, int nullCount, int offset, IEnumerable <ArrowBuffer> buffers, IEnumerable <ArrayData> children) : this(dataType, length, nullCount, offset, buffers, children, null) { }
//This is left for compatibility with lower version binaries //before the dictionary type was supported. public ArrayData( IArrowType dataType, int length, int nullCount, int offset, ArrowBuffer[] buffers, ArrayData[] children) : this(dataType, length, nullCount, offset, buffers, children, null) { }
internal Builder(ListType dataType) { ValueBuilder = ArrowArrayBuilderFactory.Build(dataType.ValueDataType); ValueOffsetsBufferBuilder = new ArrowBuffer.Builder <int>(); ValidityBufferBuilder = new BooleanArray.Builder(); DataType = dataType; }
protected BuilderBase(IArrowType dataType, int byteWidth) { DataType = dataType; ByteWidth = byteWidth; ValueBuffer = new ArrowBuffer.Builder <byte>(); ValidityBuffer = new ArrowBuffer.BitmapBuilder(); }
public void Visit(IArrowType actualType) { if (_expectedType.TypeId == actualType.TypeId) { _dataTypeMatch = true; } }
private IEnumerable <RecordBatch> GetInputIterator(Stream inputStream) { using (var reader = new ArrowStreamReader(inputStream, leaveOpen: true)) { RecordBatch batch; bool returnedResult = false; while ((batch = reader.ReadNextRecordBatch()) != null) { yield return(batch); returnedResult = true; } if (!returnedResult) { // When no input batches were received, return an empty RecordBatch // in order to create and write back the result schema. int columnCount = reader.Schema.Fields.Count; var arrays = new IArrowArray[columnCount]; for (int i = 0; i < columnCount; ++i) { IArrowType type = reader.Schema.GetFieldByIndex(i).DataType; arrays[i] = ArrowArrayHelpers.CreateEmptyArray(type); } yield return(new RecordBatch(reader.Schema, arrays, 0)); } } }
private void GenerateTestData <T, TArray, TArrayBuilder>(IArrowType type, Func <int, T> generator) where TArrayBuilder : IArrowArrayBuilder <T, TArray, TArrayBuilder> where TArray : IArrowArray { var resultBuilder = (IArrowArrayBuilder <T, TArray, TArrayBuilder>)ArrayArrayBuilderFactoryReflector.InvokeBuild(type); resultBuilder.Reserve(_baseDataTotalElementCount); for (int i = 0; i < _baseDataListCount; i++) { List <int?> dataList = _baseData[i]; var builder = (IArrowArrayBuilder <T, TArray, TArrayBuilder>)ArrayArrayBuilderFactoryReflector.InvokeBuild(type); builder.Reserve(dataList.Count); foreach (int?value in dataList) { if (value.HasValue) { builder.Append(generator(value.Value)); resultBuilder.Append(generator(value.Value)); } else { builder.AppendNull(); resultBuilder.AppendNull(); } } TestTargetArrayList.Add(builder.Build(default));
public Builder() { _metadata = new Dictionary <string, string>(); _name = string.Empty; _type = NullType.Default; _nullable = true; }
public ListArray(IArrowType dataType, int length, ArrowBuffer valueOffsetsBuffer, IArrowArray values, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : this(new ArrayData(dataType, length, nullCount, offset, new[] { nullBitmapBuffer, valueOffsetsBuffer }, new[] { values.Data }), values) { }
internal Field(string name, IArrowType dataType, bool nullable, IReadOnlyDictionary <string, string> metadata, bool copyCollections) : this(name, dataType, nullable) { Debug.Assert(copyCollections == false, "This internal constructor is to not copy the collections."); Metadata = metadata; }
private void CheckData(IArrowType type, int expectedBufferCount) { foreach (ArrayData arrayData in _arrayDataList) { arrayData.EnsureDataType(type.TypeId); arrayData.EnsureBufferCount(expectedBufferCount); } }
public StructArray( IArrowType dataType, int length, IEnumerable <Array> children, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : this(new ArrayData( dataType, length, nullCount, offset, new[] { nullBitmapBuffer }, children.Select(child => child.Data))) { }
private void ConcatenateVariableBinaryArrayData(IArrowType type) { CheckData(type, 3); ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); ArrowBuffer valueBuffer = ConcatenateVariableBinaryValueBuffer(); Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer }); }
protected PrimitiveDictionaryArray(IArrowType dataType, int length, int uniqueValuesCount, ArrowBuffer nullBitmapBuffer, ArrowBuffer indices, ArrowBuffer dataBuffer, int nullCount = 0, int offset = 0) : this(new ArrayData(dataType, length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValuesCount) { }
private VectorOffset GetChildrenFieldOffset(Field field) { IArrowType targetDataType = field.DataType is DictionaryType dictionaryType ? dictionaryType.ValueType : field.DataType; if (!(targetDataType is NestedType type)) { return(default);
public BinaryArray(IArrowType dataType, int length, ArrowBuffer valueOffsetsBuffer, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : this(new ArrayData(dataType, length, nullCount, offset, new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer })) { }
private Field(string name, IArrowType dataType, bool nullable) { if (string.IsNullOrWhiteSpace(name)) { throw new ArgumentNullException(nameof(name)); } Name = name; DataType = dataType ?? NullType.Default; IsNullable = nullable; }
private void CreateNumberArray <T>(IArrowType type) where T : struct { ArrowBuffer.Builder <T> builder = new ArrowBuffer.Builder <T>(_length); for (int i = 0; i < _length; i++) { builder.Append((T)Convert.ChangeType(i, typeof(T))); } Buffer = builder.Build(); }
public DictionaryType(IArrowType indexType, IArrowType valueType, bool ordered) { if (!(indexType is IntegerType)) { throw new ArgumentException($"{nameof(indexType)} must be integer"); } IndexType = indexType; ValueType = valueType; Ordered = ordered; }
private static void CountSelfAndChildrenNodes(IArrowType type, ref int count) { if (type is NestedType nestedType) { foreach (Field childField in nestedType.Fields) { CountSelfAndChildrenNodes(childField.DataType, ref count); } } count++; }
public void FieldsHaveExpectedValues(string name, IArrowType type, bool nullable) { var schema = new Schema.Builder() .Field(f => f.Name(name).DataType(type).Nullable(nullable)) .Build(); var field = schema.Fields[name]; Assert.Equal(name, field.Name); Assert.Equal(type.Name, field.DataType.Name); Assert.Equal(nullable, field.IsNullable); }
public ArrayData( IArrowType dataType, int length, int nullCount = 0, int offset = 0, ArrowBuffer[] buffers = null, ArrayData[] children = null) { DataType = dataType ?? NullType.Default; Length = length; NullCount = nullCount; Offset = offset; Buffers = buffers; Children = children; }
public ArrayData( IArrowType dataType, int length, int nullCount = 0, int offset = 0, IEnumerable <ArrowBuffer> buffers = null, IEnumerable <ArrayData> children = null) { DataType = dataType ?? NullType.Default; Length = length; NullCount = nullCount; Offset = offset; Buffers = buffers?.ToArray(); Children = children?.ToArray(); }
public ListEncoder(Column column) { Debug.Assert(column.Children.Count == 1); _getFunc = column.GetFunction; _nullable = column.IsNullable; var child = column.Children.First(); offsetBuilder = new ArrowBuffer.Builder <int>(); _childEncoder = EncoderHelper.GetEncoder(child); _valueType = TypeConverter.Convert(column); nullBitmap = new ArrowBuffer.BitmapBuilder(); }
public Field(string name, IArrowType dataType, bool nullable, IEnumerable <KeyValuePair <string, string> > metadata = default) { if (string.IsNullOrWhiteSpace(name)) { throw new ArgumentNullException(nameof(name)); } Name = name; DataType = dataType ?? NullType.Default; IsNullable = nullable; Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value); }
private static ArrayData BuildEmptyArrayDataFromArrowType(IArrowType arrowType) { if (s_twoBufferArrowTypes.Contains(arrowType.TypeId)) { return(new ArrayData(arrowType, 0, buffers: new[] { ArrowBuffer.Empty, ArrowBuffer.Empty })); } if (s_threeBufferArrowTypes.Contains(arrowType.TypeId)) { return(new ArrayData(arrowType, 0, buffers: new[] { ArrowBuffer.Empty, ArrowBuffer.Empty, ArrowBuffer.Empty })); } throw new NotSupportedException($"Unsupported type: {arrowType.TypeId}"); }