Ejemplo n.º 1
0
        public void TestArrowIntegration()
        {
            RecordBatch originalBatch = new RecordBatch.Builder()
                                        .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                                        .Append("Column2", true, new Int32Array(
                                                    valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                                    length: 10,
                                                    nullCount: 1,
                                                    offset: 0))
                                        .Append("Column3", true, new Int32Array(
                                                    valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(),
                                                    length: 10,
                                                    nullCount: 10,
                                                    offset: 0))
                                        .Append("NullableBooleanColumn", true, new BooleanArray(
                                                    valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                                    nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(),
                                                    length: 10,
                                                    nullCount: 2,
                                                    offset: 0))
                                        .Append("StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build())
                                        .Append("DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build())
                                        .Append("FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build())
                                        .Append("ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build())
                                        .Append("LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build())
                                        .Append("UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build())
                                        .Append("UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build())
                                        .Append("ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build())
                                        .Append("ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build())
                                        .Append("UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build())
                                        .Build();

            DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch);

            DataFrameIOTests.VerifyColumnTypes(df, testArrowStringColumn: true);

            IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches();

            foreach (RecordBatch batch in recordBatches)
            {
                RecordBatchComparer.CompareBatches(originalBatch, batch);
            }
        }
Ejemplo n.º 2
0
        public void TestRecordBatchWithStructArrays()
        {
            RecordBatch CreateRecordBatch(string prependColumnNamesWith = "")
            {
                RecordBatch ret = new RecordBatch.Builder()
                                  .Append(prependColumnNamesWith + "Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
                                  .Append(prependColumnNamesWith + "Column2", true, new Int32Array(
                                              valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                              nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                              length: 10,
                                              nullCount: 1,
                                              offset: 0))
                                  .Append(prependColumnNamesWith + "Column3", true, new Int32Array(
                                              valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(),
                                              nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(),
                                              length: 10,
                                              nullCount: 10,
                                              offset: 0))
                                  .Append(prependColumnNamesWith + "NullableBooleanColumn", true, new BooleanArray(
                                              valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(),
                                              nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(),
                                              length: 10,
                                              nullCount: 2,
                                              offset: 0))
                                  .Append(prependColumnNamesWith + "StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build())
                                  .Append(prependColumnNamesWith + "DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build())
                                  .Append(prependColumnNamesWith + "FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build())
                                  .Append(prependColumnNamesWith + "ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build())
                                  .Append(prependColumnNamesWith + "UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build())
                                  .Build();

                return(ret);
            }

            RecordBatch originalBatch = CreateRecordBatch();

            ArrowBuffer.BitmapBuilder validityBitmapBuilder = new ArrowBuffer.BitmapBuilder();
            for (int i = 0; i < originalBatch.Length; i++)
            {
                validityBitmapBuilder.Append(true);
            }
            ArrowBuffer validityBitmap = validityBitmapBuilder.Build();

            StructType  structType  = new StructType(originalBatch.Schema.Fields.Select((KeyValuePair <string, Field> pair) => pair.Value).ToList());
            StructArray structArray = new StructArray(structType, originalBatch.Length, originalBatch.Arrays.Cast <Apache.Arrow.Array>(), validityBitmap);
            Schema      schema      = new Schema.Builder().Field(new Field("Struct", structType, false)).Build();
            RecordBatch recordBatch = new RecordBatch(schema, new[] { structArray }, originalBatch.Length);

            DataFrame df = DataFrame.FromArrowRecordBatch(recordBatch);

            DataFrameIOTests.VerifyColumnTypes(df, testArrowStringColumn: true);

            IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches();

            RecordBatch expected = CreateRecordBatch("Struct_");

            foreach (RecordBatch batch in recordBatches)
            {
                RecordBatchComparer.CompareBatches(expected, batch);
            }
        }