protected BuilderBase(IArrowType dataType, int byteWidth) { DataType = dataType; ByteWidth = byteWidth; ValueBuffer = new ArrowBuffer.Builder <byte>(); ValidityBuffer = new ArrowBuffer.BitmapBuilder(); }
public void Visit(StructType type) { // TODO: Make data from type fields. // The following can be improved with a Builder class for StructArray. StringArray.Builder resultStringBuilder = new StringArray.Builder(); Int32Array.Builder resultInt32Builder = new Int32Array.Builder(); ArrowBuffer nullBitmapBuffer = new ArrowBuffer.BitmapBuilder().Append(true).Append(true).Append(false).Build(); for (int i = 0; i < 3; i++) { resultStringBuilder.Append("joe").AppendNull().AppendNull().Append("mark"); resultInt32Builder.Append(1).Append(2).AppendNull().Append(4); StringArray stringArray = new StringArray.Builder().Append("joe").AppendNull().AppendNull().Append("mark").Build(); Int32Array intArray = new Int32Array.Builder().Append(1).Append(2).AppendNull().Append(4).Build(); List <Array> arrays = new List <Array> { stringArray, intArray }; TestTargetArrayList.Add(new StructArray(type, 3, arrays, nullBitmapBuffer, 1)); } StringArray resultStringArray = resultStringBuilder.Build(); Int32Array resultInt32Array = resultInt32Builder.Build(); ExpectedArray = new StructArray(type, 3, new List <Array> { resultStringArray, resultInt32Array }, nullBitmapBuffer, 1); }
public void Visit(StructType type) { StringArray.Builder stringBuilder = new StringArray.Builder(); for (int i = 0; i < Length; i++) { stringBuilder.Append(i.ToString()); } StringArray stringArray = stringBuilder.Build(); Int32Array.Builder intBuilder = new Int32Array.Builder(); for (int i = 0; i < Length; i++) { intBuilder.Append(i); } Int32Array intArray = intBuilder.Build(); List <Array> arrays = new List <Array>(); arrays.Add(stringArray); arrays.Add(intArray); ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); for (int i = 0; i < Length; i++) { nullBitmap.Append(true); } Array = new StructArray(type, Length, arrays, nullBitmap.Build()); }
public ListEncoder(Column column) { Debug.Assert(column.Children.Count == 1); _getFunc = column.GetFunction; _nullable = column.IsNullable; var child = column.Children.First(); offsetBuilder = new ArrowBuffer.Builder <int>(); _childEncoder = EncoderHelper.GetEncoder(child); _valueType = TypeConverter.Convert(column); nullBitmap = new ArrowBuffer.BitmapBuilder(); }
public void IncreasesCapacityWhenRequired() { // Arrange var builder = new ArrowBuffer.BitmapBuilder(); int initialCapacity = builder.Capacity; builder.AppendRange(Enumerable.Repeat(true, initialCapacity)); // Fill to capacity. // Act var actualReturnValue = builder.Append(true); // Assert Assert.Equal(builder, actualReturnValue); Assert.Equal(initialCapacity + 1, builder.Length); Assert.True(builder.Capacity >= initialCapacity + 1); }
public void Visit(StructType type) { IArrowArray[] childArrays = new IArrowArray[type.Fields.Count]; for (int i = 0; i < childArrays.Length; i++) { childArrays[i] = CreateArray(type.Fields[i], Length); } ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); for (int i = 0; i < Length; i++) { nullBitmap.Append(true); } Array = new StructArray(type, Length, childArrays, nullBitmap.Build()); }
public ObjectEncoder(Column column) { _getFunc = column.GetFunction; _nullable = column.IsNullable; _type = TypeConverter.Convert(column); _childEncoders = new IArrowEncoder[column.Children.Count]; for (int i = 0; i < _childEncoders.Length; i++) { _childEncoders[i] = EncoderHelper.GetEncoder(column.Children[i]); } nullBitmap = new ArrowBuffer.BitmapBuilder(); }
public void AfterClearIncreasesLength(bool[] initialContentsToClear, bool valueToAppend) { // Arrange var builder = new ArrowBuffer.BitmapBuilder(); builder.AppendRange(initialContentsToClear); builder.Clear(); // Act var actualReturnValue = builder.Append(valueToAppend); // Assert Assert.Equal(builder, actualReturnValue); Assert.Equal(1, builder.Length); Assert.True(builder.Capacity >= 1); Assert.Equal(valueToAppend ? 1 : 0, builder.SetBitCount); Assert.Equal(valueToAppend ? 0 : 1, builder.UnsetBitCount); }
public void Visit(BooleanType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); ArrowBuffer.BitmapBuilder valueBuilder = new ArrowBuffer.BitmapBuilder(validityBuffer.Length); var json = JsonFieldData.Data.GetRawText(); bool[] values = JsonSerializer.Deserialize <bool[]>(json); foreach (bool value in values) { valueBuilder.Append(value); } ArrowBuffer valueBuffer = valueBuilder.Build(); Array = new BooleanArray( valueBuffer, validityBuffer, JsonFieldData.Count, nullCount, 0); }
public void IncreasesLength( bool[] initialContents, bool valueToAppend, int expectedLength, int expectedSetBitCount, int expectedUnsetBitCount) { // Arrange var builder = new ArrowBuffer.BitmapBuilder(); builder.AppendRange(initialContents); // Act var actualReturnValue = builder.Append(valueToAppend); // Assert Assert.Equal(builder, actualReturnValue); Assert.Equal(expectedLength, builder.Length); Assert.True(builder.Capacity >= expectedLength); Assert.Equal(expectedSetBitCount, builder.SetBitCount); Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount); }
private Tuple <Field, ListArray> CreateDictionaryTypeListArrayTestData(StringArray dictionary) { Int32Array indiceArray = new Int32Array.Builder().AppendRange(Enumerable.Range(0, dictionary.Length)).Build(); //DictionaryArray has no Builder for now, so creating ListArray directly. var dictionaryType = new DictionaryType(Int32Type.Default, StringType.Default, false); var dictionaryArray = new DictionaryArray(dictionaryType, indiceArray, dictionary); var valueOffsetsBufferBuilder = new ArrowBuffer.Builder <int>(); var validityBufferBuilder = new ArrowBuffer.BitmapBuilder(); foreach (int i in Enumerable.Range(0, dictionary.Length + 1)) { valueOffsetsBufferBuilder.Append(i); validityBufferBuilder.Append(true); } var dictionaryField = new Field("dictionaryField_list", dictionaryType, false); var listType = new ListType(dictionaryField); var listArray = new ListArray(listType, valueOffsetsBufferBuilder.Length - 1, valueOffsetsBufferBuilder.Build(), dictionaryArray, valueOffsetsBufferBuilder.Build()); return(Tuple.Create(new Field($"listField_{listType.ValueDataType.Name}", listType, false), listArray)); }
public void TestListOfStructArray() { Schema.Builder builder = new Schema.Builder(); Field structField = new Field( "struct", new StructType( new[] { new Field("name", StringType.Default, nullable: false), new Field("age", Int64Type.Default, nullable: false), }), nullable: false); Field listField = new Field("listOfStructs", new ListType(structField), nullable: false); builder.Field(listField); Schema schema = builder.Build(); StringArray stringArray = new StringArray.Builder() .Append("joe").AppendNull().AppendNull().Append("mark").Append("abe").Append("phil").Build(); Int64Array intArray = new Int64Array.Builder() .Append(1).Append(2).AppendNull().Append(4).Append(10).Append(55).Build(); ArrowBuffer nullBitmapBuffer = new ArrowBuffer.BitmapBuilder() .Append(true).Append(true).Append(false).Append(true).Append(true).Append(true).Build(); StructArray structs = new StructArray(structField.DataType, 6, new IArrowArray[] { stringArray, intArray }, nullBitmapBuffer, nullCount: 1); ArrowBuffer offsetsBuffer = new ArrowBuffer.Builder <int>() .Append(0).Append(2).Append(5).Append(6).Build(); ListArray listArray = new ListArray(listField.DataType, 3, offsetsBuffer, structs, ArrowBuffer.Empty); RecordBatch batch = new RecordBatch(schema, new[] { listArray }, 3); TestRoundTripRecordBatch(batch); }
public void TestRecordBatchWithStructArrays() { RecordBatch CreateRecordBatch(string prependColumnNamesWith = "") { RecordBatch ret = new RecordBatch.Builder() .Append(prependColumnNamesWith + "Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append(prependColumnNamesWith + "Column2", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), length: 10, nullCount: 1, offset: 0)) .Append(prependColumnNamesWith + "Column3", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(), length: 10, nullCount: 10, offset: 0)) .Append(prependColumnNamesWith + "NullableBooleanColumn", true, new BooleanArray( valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(), length: 10, nullCount: 2, offset: 0)) .Append(prependColumnNamesWith + "StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build()) .Append(prependColumnNamesWith + "DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build()) .Append(prependColumnNamesWith + "FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build()) .Append(prependColumnNamesWith + "ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build()) .Append(prependColumnNamesWith + "LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build()) .Append(prependColumnNamesWith + "UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build()) .Append(prependColumnNamesWith + "UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build()) .Append(prependColumnNamesWith + "ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build()) .Append(prependColumnNamesWith + "ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build()) .Append(prependColumnNamesWith + "UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build()) .Build(); return(ret); } RecordBatch originalBatch = CreateRecordBatch(); ArrowBuffer.BitmapBuilder validityBitmapBuilder = new ArrowBuffer.BitmapBuilder(); for (int i = 0; i < originalBatch.Length; i++) { validityBitmapBuilder.Append(true); } ArrowBuffer validityBitmap = validityBitmapBuilder.Build(); StructType structType = new StructType(originalBatch.Schema.Fields.Select((KeyValuePair <string, Field> pair) => pair.Value).ToList()); StructArray structArray = new StructArray(structType, originalBatch.Length, originalBatch.Arrays.Cast <Apache.Arrow.Array>(), validityBitmap); Schema schema = new Schema.Builder().Field(new Field("Struct", structType, false)).Build(); RecordBatch recordBatch = new RecordBatch(schema, new[] { structArray }, originalBatch.Length); DataFrame df = DataFrame.FromArrowRecordBatch(recordBatch); DataFrameIOTests.VerifyColumnTypes(df, testArrowStringColumn: true); IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches(); RecordBatch expected = CreateRecordBatch("Struct_"); foreach (RecordBatch batch in recordBatches) { RecordBatchComparer.CompareBatches(expected, batch); } }
public void TestStructArray() { // The following can be improved with a Builder class for StructArray. List <Field> fields = new List <Field>(); Field.Builder fieldBuilder = new Field.Builder(); fields.Add(fieldBuilder.Name("Strings").DataType(StringType.Default).Nullable(true).Build()); fieldBuilder = new Field.Builder(); fields.Add(fieldBuilder.Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()); StructType structType = new StructType(fields); StringArray.Builder stringBuilder = new StringArray.Builder(); StringArray stringArray = stringBuilder.Append("joe").AppendNull().AppendNull().Append("mark").Build(); Int32Array.Builder intBuilder = new Int32Array.Builder(); Int32Array intArray = intBuilder.Append(1).Append(2).AppendNull().Append(4).Build(); List <Array> arrays = new List <Array>(); arrays.Add(stringArray); arrays.Add(intArray); ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); var nullBitmapBuffer = nullBitmap.Append(true).Append(true).Append(false).Append(true).Build(); StructArray structs = new StructArray(structType, 4, arrays, nullBitmapBuffer, 1); Assert.Equal(4, structs.Length); Assert.Equal(1, structs.NullCount); ArrayData[] childArrays = structs.Data.Children; // Data for StringArray and Int32Array Assert.Equal(2, childArrays.Length); for (int i = 0; i < childArrays.Length; i++) { ArrayData arrayData = childArrays[i]; Assert.Null(arrayData.Children); if (i == 0) { Assert.Equal(ArrowTypeId.String, arrayData.DataType.TypeId); Array array = new StringArray(arrayData); StringArray structStringArray = array as StringArray; Assert.NotNull(structStringArray); Assert.Equal(structs.Length, structStringArray.Length); Assert.Equal(stringArray.Length, structStringArray.Length); Assert.Equal(stringArray.NullCount, structStringArray.NullCount); for (int j = 0; j < stringArray.Length; j++) { Assert.Equal(stringArray.GetString(j), structStringArray.GetString(j)); } } if (i == 1) { Assert.Equal(ArrowTypeId.Int32, arrayData.DataType.TypeId); Array array = new Int32Array(arrayData); Int32Array structIntArray = array as Int32Array; Assert.NotNull(structIntArray); Assert.Equal(structs.Length, structIntArray.Length); Assert.Equal(intArray.Length, structIntArray.Length); Assert.Equal(intArray.NullCount, structIntArray.NullCount); for (int j = 0; j < intArray.Length; j++) { Assert.Equal(intArray.GetValue(j), structIntArray.GetValue(j)); } } } }