示例#1
0
        public void TestIDataViewSchemaInvalidate()
        {
            DataFrame df = MakeDataFrameWithAllMutableColumnTypes(10, withNulls: false);

            IDataView dataView = df;

            DataViewSchema schema = dataView.Schema;

            Assert.Equal(14, schema.Count);

            df.RemoveColumn("Bool");
            schema = dataView.Schema;
            Assert.Equal(13, schema.Count);

            BaseColumn boolColumn = new PrimitiveColumn <bool>("Bool", Enumerable.Range(0, (int)df.RowCount).Select(x => x % 2 == 1));

            df.InsertColumn(0, boolColumn);
            schema = dataView.Schema;
            Assert.Equal(14, schema.Count);
            Assert.Equal("Bool", schema[0].Name);

            BaseColumn boolClone = boolColumn.Clone();

            boolClone.Name = "BoolClone";
            df.SetColumn(1, boolClone);
            schema = dataView.Schema;
            Assert.Equal("BoolClone", schema[1].Name);
        }
        public void TestEmptyDataFrameRecordBatch()
        {
            PrimitiveColumn <int> ageColumn    = new PrimitiveColumn <int>("Age");
            PrimitiveColumn <int> lengthColumn = new PrimitiveColumn <int>("CharCount");
            DataFrame             df           = new DataFrame(new List <BaseColumn>()
            {
                ageColumn, lengthColumn
            });

            IEnumerable <RecordBatch> recordBatches = df.AsArrowRecordBatches();
            bool foundARecordBatch = false;

            foreach (RecordBatch recordBatch in recordBatches)
            {
                foundARecordBatch = true;
                MemoryStream      stream = new MemoryStream();
                ArrowStreamWriter writer = new ArrowStreamWriter(stream, recordBatch.Schema);
                writer.WriteRecordBatchAsync(recordBatch).GetAwaiter().GetResult();

                stream.Position = 0;
                ArrowStreamReader reader          = new ArrowStreamReader(stream);
                RecordBatch       readRecordBatch = reader.ReadNextRecordBatch();
                while (readRecordBatch != null)
                {
                    RecordBatchComparer.CompareBatches(recordBatch, readRecordBatch);
                    readRecordBatch = reader.ReadNextRecordBatch();
                }
            }
            Assert.True(foundARecordBatch);
        }
示例#3
0
        public void TestBinaryOperationsWithConversions()
        {
            DataFrame df = DataFrameTests.MakeDataFrameWithTwoColumns(10);

            // Add a double to an int column
            DataFrame dfd   = df.Add(5.0f);
            var       dtype = dfd.Column(0).DataType;

            Assert.True(dtype == typeof(double));

            // Add a decimal to an int column
            DataFrame dfm = df.Add(5.0m);

            dtype = dfm.Column(0).DataType;
            Assert.True(dtype == typeof(decimal));

            // int + bool should throw
            Assert.Throws <NotSupportedException>(() => df.Add(true));

            var dataFrameColumn1 = new PrimitiveColumn <double>("Double1", Enumerable.Range(0, 10).Select(x => (double)x));

            df.SetColumn(0, dataFrameColumn1);
            // Double + comparison ops should throw
            Assert.Throws <NotSupportedException>(() => df.And(true));
        }
示例#4
0
        public static DataFrame MakeDataFrameWithNumericColumns(int length, bool withNulls = true)
        {
            BaseColumn byteColumn    = new PrimitiveColumn <byte>("Byte", Enumerable.Range(0, length).Select(x => (byte)x));
            BaseColumn charColumn    = new PrimitiveColumn <char>("Char", Enumerable.Range(0, length).Select(x => (char)(x + 65)));
            BaseColumn decimalColumn = new PrimitiveColumn <decimal>("Decimal", Enumerable.Range(0, length).Select(x => (decimal)x));
            BaseColumn doubleColumn  = new PrimitiveColumn <double>("Double", Enumerable.Range(0, length).Select(x => (double)x));
            BaseColumn floatColumn   = new PrimitiveColumn <float>("Float", Enumerable.Range(0, length).Select(x => (float)x));
            BaseColumn intColumn     = new PrimitiveColumn <int>("Int", Enumerable.Range(0, length).Select(x => x));
            BaseColumn longColumn    = new PrimitiveColumn <long>("Long", Enumerable.Range(0, length).Select(x => (long)x));
            BaseColumn sbyteColumn   = new PrimitiveColumn <sbyte>("Sbyte", Enumerable.Range(0, length).Select(x => (sbyte)x));
            BaseColumn shortColumn   = new PrimitiveColumn <short>("Short", Enumerable.Range(0, length).Select(x => (short)x));
            BaseColumn uintColumn    = new PrimitiveColumn <uint>("Uint", Enumerable.Range(0, length).Select(x => (uint)x));
            BaseColumn ulongColumn   = new PrimitiveColumn <ulong>("Ulong", Enumerable.Range(0, length).Select(x => (ulong)x));
            BaseColumn ushortColumn  = new PrimitiveColumn <ushort>("Ushort", Enumerable.Range(0, length).Select(x => (ushort)x));

            DataFrame dataFrame = new DataFrame(new List <BaseColumn> {
                byteColumn, charColumn, decimalColumn, doubleColumn, floatColumn, intColumn, longColumn, sbyteColumn, shortColumn, uintColumn, ulongColumn, ushortColumn
            });

            if (withNulls)
            {
                for (int i = 0; i < dataFrame.ColumnCount; i++)
                {
                    dataFrame.Column(i)[length / 2] = null;
                }
            }
            return(dataFrame);
        }
示例#5
0
        public void TestPrimitiveColumnSort(int numberOfNulls)
        {
            // Primitive Column Sort
            PrimitiveColumn <int> intColumn = new PrimitiveColumn <int>("Int", 0);

            Assert.Equal(0, intColumn.NullCount);
            intColumn.AppendMany(null, numberOfNulls);
            Assert.Equal(numberOfNulls, intColumn.NullCount);

            // Should handle all nulls
            PrimitiveColumn <int> sortedIntColumn = intColumn.Sort() as PrimitiveColumn <int>;

            Assert.Equal(numberOfNulls, sortedIntColumn.NullCount);
            Assert.Null(sortedIntColumn[0]);

            for (int i = 0; i < 5; i++)
            {
                intColumn.Append(i);
            }
            Assert.Equal(numberOfNulls, intColumn.NullCount);

            // Ascending sort
            sortedIntColumn = intColumn.Sort() as PrimitiveColumn <int>;
            Assert.Equal(0, sortedIntColumn[0]);
            Assert.Null(sortedIntColumn[9]);

            // Descending sort
            sortedIntColumn = intColumn.Sort(false) as PrimitiveColumn <int>;
            Assert.Equal(4, sortedIntColumn[0]);
            Assert.Null(sortedIntColumn[9]);
        }
        public void TestAppendMany()
        {
            PrimitiveColumn <int> intColumn = new PrimitiveColumn <int>("Int1");

            intColumn.AppendMany(null, 5);
            Assert.Equal(5, intColumn.NullCount);
            Assert.Equal(5, intColumn.Length);
            for (int i = 0; i < intColumn.Length; i++)
            {
                Assert.False(intColumn.IsValid(i));
            }

            intColumn.AppendMany(5, 5);
            Assert.Equal(5, intColumn.NullCount);
            Assert.Equal(10, intColumn.Length);
            for (int i = 5; i < intColumn.Length; i++)
            {
                Assert.True(intColumn.IsValid(i));
            }

            intColumn[2] = 10;
            Assert.Equal(4, intColumn.NullCount);
            Assert.True(intColumn.IsValid(2));

            intColumn[7] = null;
            Assert.Equal(5, intColumn.NullCount);
            Assert.False(intColumn.IsValid(7));
        }
示例#7
0
        public static DataFrame MakeDataFrameWithAllColumnTypes(int length)
        {
            DataFrame  df         = MakeDataFrameWithNumericAndStringColumns(length);
            BaseColumn boolColumn = new PrimitiveColumn <bool>("Bool", Enumerable.Range(0, length).Select(x => x % 2 == 0));

            df.InsertColumn(df.ColumnCount, boolColumn);
            return(df);
        }
示例#8
0
        public void TestNullCounts()
        {
            PrimitiveColumn <int> dataFrameColumn1 = new PrimitiveColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x));

            dataFrameColumn1.Append(null);
            Assert.Equal(1, dataFrameColumn1.NullCount);

            PrimitiveColumn <int> df2 = new PrimitiveColumn <int>("Int2");

            Assert.Equal(0, df2.NullCount);

            PrimitiveColumn <int> df3 = new PrimitiveColumn <int>("Int3", 10);

            Assert.Equal(0, df3.NullCount);

            // Test null counts with assignments on Primitive Columns
            df2.Append(null);
            df2.Append(1);
            Assert.Equal(1, df2.NullCount);
            df2[1] = 10;
            Assert.Equal(1, df2.NullCount);
            df2[1] = null;
            Assert.Equal(2, df2.NullCount);
            df2[1] = 5;
            Assert.Equal(1, df2.NullCount);
            df2[0] = null;
            Assert.Equal(1, df2.NullCount);

            // Test null counts with assignments on String Columns
            StringColumn strCol = new StringColumn("String", 0);

            Assert.Equal(0, strCol.NullCount);

            StringColumn strCol1 = new StringColumn("String1", 5);

            Assert.Equal(0, strCol1.NullCount);

            StringColumn strCol2 = new StringColumn("String", Enumerable.Range(0, 10).Select(x => x.ToString()));

            Assert.Equal(0, strCol2.NullCount);

            StringColumn strCol3 = new StringColumn("String", Enumerable.Range(0, 10).Select(x => (string)null));

            Assert.Equal(10, strCol3.NullCount);

            strCol.Append(null);
            Assert.Equal(1, strCol.NullCount);
            strCol.Append("foo");
            Assert.Equal(1, strCol.NullCount);
            strCol[1] = "bar";
            Assert.Equal(1, strCol.NullCount);
            strCol[1] = null;
            Assert.Equal(2, strCol.NullCount);
            strCol[1] = "foo";
            Assert.Equal(1, strCol.NullCount);
            strCol[0] = null;
            Assert.Equal(1, strCol.NullCount);
        }
示例#9
0
        public static DataFrame MakeDataFrameWithTwoColumns(int length)
        {
            BaseColumn dataFrameColumn1 = new PrimitiveColumn <int>("Int1", Enumerable.Range(0, length).Select(x => x));
            BaseColumn dataFrameColumn2 = new PrimitiveColumn <int>("Int2", Enumerable.Range(10, length).Select(x => x));

            Data.DataFrame dataFrame = new Data.DataFrame();
            dataFrame.InsertColumn(0, dataFrameColumn1);
            dataFrame.InsertColumn(1, dataFrameColumn2);
            return(dataFrame);
        }
示例#10
0
        public void TestValidity()
        {
            PrimitiveColumn <int> dataFrameColumn1 = new PrimitiveColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x));

            dataFrameColumn1.Append(null);
            Assert.False(dataFrameColumn1.IsValid(10));
            for (long i = 0; i < dataFrameColumn1.Length - 1; i++)
            {
                Assert.True(dataFrameColumn1.IsValid(i));
            }
        }
示例#11
0
        public static DataFrame MakeDataFrameWithAllMutableColumnTypes(int length, bool withNulls = true)
        {
            DataFrame  df         = MakeDataFrameWithNumericAndStringColumns(length, withNulls);
            BaseColumn boolColumn = new PrimitiveColumn <bool>("Bool", Enumerable.Range(0, length).Select(x => x % 2 == 0));

            df.InsertColumn(df.ColumnCount, boolColumn);
            if (withNulls)
            {
                boolColumn[length / 2] = null;
            }
            return(df);
        }
示例#12
0
        public void InsertAndRemoveColumnTests()
        {
            DataFrame  dataFrame     = MakeDataFrameWithAllMutableColumnTypes(10);
            BaseColumn intColumn     = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));
            BaseColumn charColumn    = dataFrame["Char"];
            int        insertedIndex = dataFrame.ColumnCount;

            dataFrame.InsertColumn(dataFrame.ColumnCount, intColumn);
            dataFrame.RemoveColumn(0);
            BaseColumn intColumn_1  = dataFrame["IntColumn"];
            BaseColumn charColumn_1 = dataFrame["Char"];

            Assert.True(ReferenceEquals(intColumn, intColumn_1));
            Assert.True(ReferenceEquals(charColumn, charColumn_1));
        }
示例#13
0
        public void TestPrimitiveColumnGetReadOnlyBuffers()
        {
            RecordBatch recordBatch = new RecordBatch.Builder()
                                      .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))).Build();
            DataFrame df = new DataFrame(recordBatch);

            PrimitiveColumn <int> column = df["Column1"] as PrimitiveColumn <int>;

            IEnumerable <ReadOnlyMemory <int> >  buffers     = column.GetReadOnlyDataBuffers();
            IEnumerable <ReadOnlyMemory <byte> > nullBitMaps = column.GetReadOnlyNullBitMapBuffers();

            long i = 0;
            IEnumerator <ReadOnlyMemory <int> >  bufferEnumerator      = buffers.GetEnumerator();
            IEnumerator <ReadOnlyMemory <byte> > nullBitMapsEnumerator = nullBitMaps.GetEnumerator();

            while (bufferEnumerator.MoveNext() && nullBitMapsEnumerator.MoveNext())
            {
                ReadOnlyMemory <int>  dataBuffer = bufferEnumerator.Current;
                ReadOnlyMemory <byte> nullBitMap = nullBitMapsEnumerator.Current;

                ReadOnlySpan <int> span = dataBuffer.Span;
                for (int j = 0; j < span.Length; j++)
                {
                    // Each buffer has a max length of int.MaxValue
                    Assert.Equal(span[j], column[j + i * int.MaxValue]);
                }

                bool GetBit(byte curBitMap, int index)
                {
                    return(((curBitMap >> (index & 7)) & 1) != 0);
                }

                ReadOnlySpan <byte> bitMapSpan = nullBitMap.Span;
                // No nulls in this column, so each bit must be set
                for (int j = 0; j < bitMapSpan.Length; j++)
                {
                    for (int k = 0; k < 8; k++)
                    {
                        Assert.True(GetBit(bitMapSpan[j], k));
                    }
                }
                i++;
            }
        }
示例#14
0
        public static DataFrame MakeDataFrame <T1, T2>(int length, bool withNulls = true)
            where T1 : unmanaged
            where T2 : unmanaged
        {
            BaseColumn baseColumn1 = new PrimitiveColumn <T1>("Column1", Enumerable.Range(0, length).Select(x => (T1)Convert.ChangeType(x % 2 == 0 ? 0 : 1, typeof(T1))));
            BaseColumn baseColumn2 = new PrimitiveColumn <T2>("Column2", Enumerable.Range(0, length).Select(x => (T2)Convert.ChangeType(x % 2 == 0 ? 0 : 1, typeof(T2))));
            DataFrame  dataFrame   = new DataFrame(new List <BaseColumn> {
                baseColumn1, baseColumn2
            });

            if (withNulls)
            {
                for (int i = 0; i < dataFrame.ColumnCount; i++)
                {
                    dataFrame.Column(i)[length / 2] = null;
                }
            }

            return(dataFrame);
        }
示例#15
0
        public void TestBinaryOperationsOnBoolColumn()
        {
            var df = new DataFrame();
            var dataFrameColumn1 = new PrimitiveColumn <bool>("Bool1", Enumerable.Range(0, 10).Select(x => true));
            var dataFrameColumn2 = new PrimitiveColumn <bool>("Bool2", Enumerable.Range(0, 10).Select(x => true));

            df.InsertColumn(0, dataFrameColumn1);
            df.InsertColumn(1, dataFrameColumn2);

            // bool + int should throw
            Assert.Throws <NotSupportedException>(() => df.Add(5));
            // Left shift should throw
            Assert.Throws <NotSupportedException>(() => df.LeftShift(5));

            IReadOnlyList <bool> listOfBools = new List <bool>()
            {
                true, false
            };
            // bool equals and And should work
            var newdf = df.Equals(true);

            Assert.Equal(true, newdf[4, 0]);
            var newdf1 = df.Equals(listOfBools);

            Assert.Equal(false, newdf1[4, 1]);

            newdf = df.And(true);
            Assert.Equal(true, newdf[4, 0]);
            newdf1 = df.And(listOfBools);
            Assert.Equal(false, newdf1[4, 1]);

            newdf = df.Or(true);
            Assert.Equal(true, newdf[4, 0]);
            newdf1 = df.Or(listOfBools);
            Assert.Equal(true, newdf1[4, 1]);

            newdf = df.Xor(true);
            Assert.Equal(false, newdf[4, 0]);
            newdf1 = df.Xor(listOfBools);
            Assert.Equal(true, newdf1[4, 1]);
        }
示例#16
0
        public void ColumnAndTableCreationTest()
        {
            BaseColumn intColumn   = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));
            BaseColumn floatColumn = new PrimitiveColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
            DataFrame  dataFrame   = new DataFrame();

            dataFrame.InsertColumn(0, intColumn);
            dataFrame.InsertColumn(1, floatColumn);
            Assert.Equal(10, dataFrame.RowCount);
            Assert.Equal(2, dataFrame.ColumnCount);
            Assert.Equal(10, dataFrame.Column(0).Length);
            Assert.Equal("IntColumn", dataFrame.Column(0).Name);
            Assert.Equal(10, dataFrame.Column(1).Length);
            Assert.Equal("FloatColumn", dataFrame.Column(1).Name);

            BaseColumn bigColumn    = new PrimitiveColumn <float>("BigColumn", Enumerable.Range(0, 11).Select(x => (float)x));
            BaseColumn repeatedName = new PrimitiveColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));

            Assert.Throws <ArgumentException>(() => dataFrame.InsertColumn(2, bigColumn));
            Assert.Throws <ArgumentException>(() => dataFrame.InsertColumn(2, repeatedName));
            Assert.Throws <ArgumentOutOfRangeException>(() => dataFrame.InsertColumn(10, repeatedName));

            Assert.Equal(2, dataFrame.ColumnCount);
            BaseColumn intColumnCopy = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));

            Assert.Throws <ArgumentException>(() => dataFrame.SetColumn(1, intColumnCopy));

            BaseColumn differentIntColumn = new PrimitiveColumn <int>("IntColumn1", Enumerable.Range(0, 10).Select(x => x));

            dataFrame.SetColumn(1, differentIntColumn);
            Assert.True(object.ReferenceEquals(differentIntColumn, dataFrame.Column(1)));

            dataFrame.RemoveColumn(1);
            Assert.Equal(1, dataFrame.ColumnCount);
            Assert.True(ReferenceEquals(intColumn, dataFrame.Column(0)));
        }
示例#17
0
        public void TestIEnumerable()
        {
            DataFrame df = MakeDataFrameWithAllColumnTypes(10);

            int totalValueCount = 0;

            for (int i = 0; i < df.ColumnCount; i++)
            {
                BaseColumn baseColumn = df.Column(i);
                foreach (object value in baseColumn)
                {
                    totalValueCount++;
                }
            }
            Assert.Equal(10 * df.ColumnCount, totalValueCount);

            // spot check a few column types:

            StringColumn  stringColumn  = (StringColumn)df["String"];
            StringBuilder actualStrings = new StringBuilder();

            foreach (string value in stringColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("01234<null>6789", actualStrings.ToString());

            ArrowStringColumn arrowStringColumn = (ArrowStringColumn)df["ArrowString"];

            actualStrings.Clear();
            foreach (string value in arrowStringColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("foofoofoofoofoofoofoofoofoofoo", actualStrings.ToString());

            PrimitiveColumn <float> floatColumn = (PrimitiveColumn <float>)df["Float"];

            actualStrings.Clear();
            foreach (float?value in floatColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("01234<null>6789", actualStrings.ToString());

            PrimitiveColumn <int> intColumn = (PrimitiveColumn <int>)df["Int"];

            actualStrings.Clear();
            foreach (int?value in intColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("01234<null>6789", actualStrings.ToString());
        }
示例#18
0
        private void VerifyJoin(DataFrame join, DataFrame left, DataFrame right, JoinAlgorithm joinAlgorithm)
        {
            PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("map", join.RowCount);

            for (long i = 0; i < join.RowCount; i++)
            {
                mapIndices[i] = i;
            }
            for (int i = 0; i < join.ColumnCount; i++)
            {
                BaseColumn joinColumn = join.Column(i);
                BaseColumn isEqual;

                if (joinAlgorithm == JoinAlgorithm.Left)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn;
                    }
                    else
                    {
                        int        columnIndex   = i - left.ColumnCount;
                        BaseColumn rightColumn   = right.Column(columnIndex);
                        BaseColumn compareColumn = rightColumn.Length <= join.RowCount ? rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length) : rightColumn.Clone(mapIndices);
                        isEqual = joinColumn == compareColumn;
                    }
                }
                else if (joinAlgorithm == JoinAlgorithm.Right)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn    = left.Column(i);
                        BaseColumn compareColumn = leftColumn.Length <= join.RowCount ? leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length) : leftColumn.Clone(mapIndices);
                        isEqual = joinColumn == compareColumn;
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn;
                    }
                }
                else if (joinAlgorithm == JoinAlgorithm.Inner)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn.Clone(mapIndices);
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn.Clone(mapIndices);
                    }
                }
                else
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length);
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length);
                    }
                }
                for (int j = 0; j < join.RowCount; j++)
                {
                    Assert.Equal(true, isEqual[j]);
                }
            }
        }