コード例 #1
0
        public void DataFrameColumnManipulations()
        {
            DataList column1 = new DataList <int>("Integer");
            DataList column2 = new DataList <double>("Double");
            DataList column3 = new DataList <DateTime>("Timestamp");

            DataFrame frame = new DataFrame(column1, column2);

            Assert.IsTrue(frame.Columns.Count == 2);

            Assert.IsTrue(frame.Columns[0].Name == column1.Name);
            Assert.IsTrue(frame.Columns[0].StorageType == column1.StorageType);
            Assert.IsTrue(frame.Column <object>(column1.Name).Name == column1.Name);

            frame.AddColumn(column3);
            Assert.IsTrue(frame.Columns.Count == 3);

            Assert.IsTrue(frame.Columns[2].Name == column3.Name);
            Assert.IsTrue(frame.Columns[2].StorageType == column3.StorageType);
            Assert.IsTrue(frame.Column <object>(column3.Name).Name == column3.Name);

            frame.RemoveColumn(column1.Name);
            Assert.IsTrue(frame.Columns.Count == 2);

            Assert.IsTrue(frame.Columns[0].Name == column2.Name);
            Assert.IsTrue(frame.Columns[0].StorageType == column2.StorageType);
            Assert.IsTrue(frame.Column <object>(column2.Name).Name == column2.Name);
        }
コード例 #2
0
ファイル: DataFrameTests.cs プロジェクト: pscream/corefxlab
        public void TestBinaryOperationsWithConversions()
        {
            DataFrame df = DataFrameTests.MakeDataFrameWithTwoColumns(10);

            // Add a double to an int column
            DataFrame dfd   = df.Add(5.0f);
            var       dtype = dfd.Column(0).DataType;

            Assert.True(dtype == typeof(double));

            // Add a decimal to an int column
            DataFrame dfm = df.Add(5.0m);

            dtype = dfm.Column(0).DataType;
            Assert.True(dtype == typeof(decimal));

            // int + bool should throw
            Assert.Throws <NotSupportedException>(() => df.Add(true));

            var dataFrameColumn1 = new PrimitiveColumn <double>("Double1", Enumerable.Range(0, 10).Select(x => (double)x));

            df.SetColumn(0, dataFrameColumn1);
            // Double + comparison ops should throw
            Assert.Throws <NotSupportedException>(() => df.And(true));
        }
コード例 #3
0
        public static DataFrame MakeDataFrameWithNumericColumns(int length, bool withNulls = true)
        {
            BaseColumn byteColumn    = new PrimitiveColumn <byte>("Byte", Enumerable.Range(0, length).Select(x => (byte)x));
            BaseColumn charColumn    = new PrimitiveColumn <char>("Char", Enumerable.Range(0, length).Select(x => (char)(x + 65)));
            BaseColumn decimalColumn = new PrimitiveColumn <decimal>("Decimal", Enumerable.Range(0, length).Select(x => (decimal)x));
            BaseColumn doubleColumn  = new PrimitiveColumn <double>("Double", Enumerable.Range(0, length).Select(x => (double)x));
            BaseColumn floatColumn   = new PrimitiveColumn <float>("Float", Enumerable.Range(0, length).Select(x => (float)x));
            BaseColumn intColumn     = new PrimitiveColumn <int>("Int", Enumerable.Range(0, length).Select(x => x));
            BaseColumn longColumn    = new PrimitiveColumn <long>("Long", Enumerable.Range(0, length).Select(x => (long)x));
            BaseColumn sbyteColumn   = new PrimitiveColumn <sbyte>("Sbyte", Enumerable.Range(0, length).Select(x => (sbyte)x));
            BaseColumn shortColumn   = new PrimitiveColumn <short>("Short", Enumerable.Range(0, length).Select(x => (short)x));
            BaseColumn uintColumn    = new PrimitiveColumn <uint>("Uint", Enumerable.Range(0, length).Select(x => (uint)x));
            BaseColumn ulongColumn   = new PrimitiveColumn <ulong>("Ulong", Enumerable.Range(0, length).Select(x => (ulong)x));
            BaseColumn ushortColumn  = new PrimitiveColumn <ushort>("Ushort", Enumerable.Range(0, length).Select(x => (ushort)x));

            DataFrame dataFrame = new DataFrame(new List <BaseColumn> {
                byteColumn, charColumn, decimalColumn, doubleColumn, floatColumn, intColumn, longColumn, sbyteColumn, shortColumn, uintColumn, ulongColumn, ushortColumn
            });

            if (withNulls)
            {
                for (int i = 0; i < dataFrame.ColumnCount; i++)
                {
                    dataFrame.Column(i)[length / 2] = null;
                }
            }
            return(dataFrame);
        }
コード例 #4
0
        public void Timings()
        {
            int    n    = 10000;
            int    m    = 10000;
            Random rng  = new Random(1);
            double mean = 0.0;

            double[] array = new double[n];
            for (int j = 0; j < array.Length; j++)
            {
                array[j] = rng.NextDouble();
            }
            Stopwatch arrayTimer = Stopwatch.StartNew();

            for (int i = 0; i < m; i++)
            {
                mean += array.Mean();
            }
            arrayTimer.Stop();
            Console.WriteLine(arrayTimer.ElapsedMilliseconds);

            DataList <double> list = new DataList <double>("list");

            for (int j = 0; j < n; j++)
            {
                list.Add(rng.NextDouble());
            }
            Stopwatch listTimer = Stopwatch.StartNew();

            for (int i = 0; i < m; i++)
            {
                mean += list.Mean();
            }
            listTimer.Stop();
            Console.WriteLine(listTimer.ElapsedMilliseconds);

            DataFrame           frame      = new DataFrame(list);
            Stopwatch           frameTimer = Stopwatch.StartNew();
            DataColumn <double> frameList  = frame.Column <double>("list");

            for (int i = 0; i < m; i++)
            {
                mean += frameList.Mean();
            }
            frameTimer.Stop();
            Console.WriteLine(frameTimer.ElapsedMilliseconds);
        }
コード例 #5
0
        public static DataFrame MakeDataFrame <T1, T2>(int length, bool withNulls = true)
            where T1 : unmanaged
            where T2 : unmanaged
        {
            BaseColumn baseColumn1 = new PrimitiveColumn <T1>("Column1", Enumerable.Range(0, length).Select(x => (T1)Convert.ChangeType(x % 2 == 0 ? 0 : 1, typeof(T1))));
            BaseColumn baseColumn2 = new PrimitiveColumn <T2>("Column2", Enumerable.Range(0, length).Select(x => (T2)Convert.ChangeType(x % 2 == 0 ? 0 : 1, typeof(T2))));
            DataFrame  dataFrame   = new DataFrame(new List <BaseColumn> {
                baseColumn1, baseColumn2
            });

            if (withNulls)
            {
                for (int i = 0; i < dataFrame.ColumnCount; i++)
                {
                    dataFrame.Column(i)[length / 2] = null;
                }
            }

            return(dataFrame);
        }
コード例 #6
0
ファイル: DataFrameTests.cs プロジェクト: pscream/corefxlab
        public void ColumnAndTableCreationTest()
        {
            BaseColumn intColumn   = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));
            BaseColumn floatColumn = new PrimitiveColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
            DataFrame  dataFrame   = new DataFrame();

            dataFrame.InsertColumn(0, intColumn);
            dataFrame.InsertColumn(1, floatColumn);
            Assert.Equal(10, dataFrame.RowCount);
            Assert.Equal(2, dataFrame.ColumnCount);
            Assert.Equal(10, dataFrame.Column(0).Length);
            Assert.Equal("IntColumn", dataFrame.Column(0).Name);
            Assert.Equal(10, dataFrame.Column(1).Length);
            Assert.Equal("FloatColumn", dataFrame.Column(1).Name);

            BaseColumn bigColumn    = new PrimitiveColumn <float>("BigColumn", Enumerable.Range(0, 11).Select(x => (float)x));
            BaseColumn repeatedName = new PrimitiveColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));

            Assert.Throws <ArgumentException>(() => dataFrame.InsertColumn(2, bigColumn));
            Assert.Throws <ArgumentException>(() => dataFrame.InsertColumn(2, repeatedName));
            Assert.Throws <ArgumentOutOfRangeException>(() => dataFrame.InsertColumn(10, repeatedName));

            Assert.Equal(2, dataFrame.ColumnCount);
            BaseColumn intColumnCopy = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));

            Assert.Throws <ArgumentException>(() => dataFrame.SetColumn(1, intColumnCopy));

            BaseColumn differentIntColumn = new PrimitiveColumn <int>("IntColumn1", Enumerable.Range(0, 10).Select(x => x));

            dataFrame.SetColumn(1, differentIntColumn);
            Assert.True(object.ReferenceEquals(differentIntColumn, dataFrame.Column(1)));

            dataFrame.RemoveColumn(1);
            Assert.Equal(1, dataFrame.ColumnCount);
            Assert.True(ReferenceEquals(intColumn, dataFrame.Column(0)));
        }
コード例 #7
0
ファイル: DataFrameTests.cs プロジェクト: pscream/corefxlab
        public void TestComputations()
        {
            DataFrame df = MakeDataFrameWithAllColumnTypes(10);

            df["Int"][0] = -10;
            Assert.Equal(-10, df["Int"][0]);

            df["Int"].Abs();
            Assert.Equal(10, df["Int"][0]);

            Assert.Throws <NotSupportedException>(() => df["Byte"].All());
            Assert.Throws <NotSupportedException>(() => df["Byte"].Any());
            Assert.Throws <NotSupportedException>(() => df["Char"].All());
            Assert.Throws <NotSupportedException>(() => df["Char"].Any());
            Assert.Throws <NotSupportedException>(() => df["Decimal"].All());
            Assert.Throws <NotSupportedException>(() => df["Decimal"].Any());
            Assert.Throws <NotSupportedException>(() => df["Double"].All());
            Assert.Throws <NotSupportedException>(() => df["Double"].Any());
            Assert.Throws <NotSupportedException>(() => df["Float"].All());
            Assert.Throws <NotSupportedException>(() => df["Float"].Any());
            Assert.Throws <NotSupportedException>(() => df["Int"].All());
            Assert.Throws <NotSupportedException>(() => df["Int"].Any());
            Assert.Throws <NotSupportedException>(() => df["Long"].All());
            Assert.Throws <NotSupportedException>(() => df["Long"].Any());
            Assert.Throws <NotSupportedException>(() => df["Sbyte"].All());
            Assert.Throws <NotSupportedException>(() => df["Sbyte"].Any());
            Assert.Throws <NotSupportedException>(() => df["Short"].All());
            Assert.Throws <NotSupportedException>(() => df["Short"].Any());
            Assert.Throws <NotSupportedException>(() => df["Uint"].All());
            Assert.Throws <NotSupportedException>(() => df["Uint"].Any());
            Assert.Throws <NotSupportedException>(() => df["Ulong"].All());
            Assert.Throws <NotSupportedException>(() => df["Ulong"].Any());
            Assert.Throws <NotSupportedException>(() => df["Ushort"].All());
            Assert.Throws <NotSupportedException>(() => df["Ushort"].Any());

            bool any = df["Bool"].Any();
            bool all = df["Bool"].All();

            Assert.True(any);
            Assert.False(all);

            // Test the computation results
            df["Double"][0] = 100.0;
            df["Double"].CumulativeMax();
            Assert.Equal(100.0, df["Double"][9]);

            df["Float"][0] = -10.0f;
            df["Float"].CumulativeMin();
            Assert.Equal(-10.0f, df["Float"][9]);

            df["Uint"].CumulativeProduct();
            Assert.Equal((uint)0, df["Uint"][9]);

            df["Ushort"].CumulativeSum();
            Assert.Equal((ushort)45, df["Ushort"][9]);

            Assert.Equal(100.0, df["Double"].Max());
            Assert.Equal(-10.0f, df["Float"].Min());
            Assert.Equal((uint)0, df["Uint"].Product());
            Assert.Equal((ushort)165, df["Ushort"].Sum());

            df["Double"][0] = 100.1;
            Assert.Equal(100.1, df["Double"][0]);
            df["Double"].Round();
            Assert.Equal(100.0, df["Double"][0]);

            // Test that none of the numeric column types throw
            for (int i = 0; i < df.ColumnCount; i++)
            {
                BaseColumn column = df.Column(i);
                if (column.DataType == typeof(bool))
                {
                    Assert.Throws <NotSupportedException>(() => column.CumulativeMax());
                    Assert.Throws <NotSupportedException>(() => column.CumulativeMin());
                    Assert.Throws <NotSupportedException>(() => column.CumulativeProduct());
                    Assert.Throws <NotSupportedException>(() => column.CumulativeSum());
                    Assert.Throws <NotSupportedException>(() => column.Max());
                    Assert.Throws <NotSupportedException>(() => column.Min());
                    Assert.Throws <NotSupportedException>(() => column.Product());
                    Assert.Throws <NotSupportedException>(() => column.Sum());
                    continue;
                }
                else if (column.DataType == typeof(string))
                {
                    Assert.Throws <NotImplementedException>(() => column.CumulativeMax());
                    Assert.Throws <NotImplementedException>(() => column.CumulativeMin());
                    Assert.Throws <NotImplementedException>(() => column.CumulativeProduct());
                    Assert.Throws <NotImplementedException>(() => column.CumulativeSum());
                    Assert.Throws <NotImplementedException>(() => column.Max());
                    Assert.Throws <NotImplementedException>(() => column.Min());
                    Assert.Throws <NotImplementedException>(() => column.Product());
                    Assert.Throws <NotImplementedException>(() => column.Sum());
                    continue;
                }
                column.CumulativeMax();
                column.CumulativeMin();
                column.CumulativeProduct();
                column.CumulativeSum();
                column.Max();
                column.Min();
                column.Product();
                column.Sum();
            }
        }
コード例 #8
0
        public void TestGroupBy()
        {
            DataFrame df    = MakeDataFrameWithNumericAndBoolColumns(10);
            DataFrame count = df.GroupBy("Bool").Count();

            Assert.Equal(2, count.RowCount);
            Assert.Equal((long)5, count["Int"][0]);
            Assert.Equal((long)4, count["Decimal"][1]);
            for (int r = 0; r < count.RowCount; r++)
            {
                for (int c = 1; c < count.ColumnCount; c++)
                {
                    Assert.Equal((long)(r == 0 ? 5 : 4), count.Column(c)[r]);
                }
            }

            DataFrame first = df.GroupBy("Bool").First();

            Assert.Equal(2, first.RowCount);
            for (int r = 0; r < 2; r++)
            {
                for (int c = 0; c < count.ColumnCount; c++)
                {
                    BaseColumn originalColumn = df.Column(c);
                    BaseColumn firstColumn    = first[originalColumn.Name];
                    Assert.Equal(originalColumn[r], firstColumn[r]);
                }
            }

            DataFrame  head   = df.GroupBy("Bool").Head(3);
            List <int> verify = new List <int>()
            {
                0, 3, 1, 4, 2, 5
            };

            for (int r = 0; r < 5; r++)
            {
                for (int c = 0; c < count.ColumnCount; c++)
                {
                    BaseColumn originalColumn = df.Column(c);
                    BaseColumn headColumn     = head[originalColumn.Name];
                    Assert.Equal(originalColumn[r].ToString(), headColumn[verify[r]].ToString());
                }
            }
            for (int c = 0; c < count.ColumnCount; c++)
            {
                BaseColumn originalColumn = df.Column(c);
                if (originalColumn.Name == "Bool")
                {
                    continue;
                }
                BaseColumn headColumn = head[originalColumn.Name];
                Assert.Equal(originalColumn[5], headColumn[verify[5]]);
            }
            Assert.Equal(6, head.RowCount);

            DataFrame tail = df.GroupBy("Bool").Tail(3);

            Assert.Equal(6, tail.RowCount);
            List <int> originalColumnVerify = new List <int>()
            {
                6, 8, 7, 9
            };
            List <int> tailColumnVerity = new List <int>()
            {
                1, 2, 4, 5
            };

            for (int r = 0; r < 4; r++)
            {
                for (int c = 0; c < count.ColumnCount; c++)
                {
                    BaseColumn originalColumn = df.Column(c);
                    BaseColumn tailColumn     = tail[originalColumn.Name];
                    Assert.Equal(originalColumn[originalColumnVerify[r]].ToString(), tailColumn[tailColumnVerity[r]].ToString());
                }
            }

            DataFrame max = df.GroupBy("Bool").Max();

            Assert.Equal(2, max.RowCount);
            for (int r = 0; r < 2; r++)
            {
                for (int c = 0; c < count.ColumnCount; c++)
                {
                    BaseColumn originalColumn = df.Column(c);
                    if (originalColumn.Name == "Bool" || originalColumn.Name == "Char")
                    {
                        continue;
                    }
                    BaseColumn maxColumn = max[originalColumn.Name];
                    Assert.Equal(((long)(r == 0 ? 8 : 9)).ToString(), maxColumn[r].ToString());
                }
            }

            DataFrame min = df.GroupBy("Bool").Min();

            Assert.Equal(2, min.RowCount);

            DataFrame product = df.GroupBy("Bool").Product();

            Assert.Equal(2, product.RowCount);

            DataFrame sum = df.GroupBy("Bool").Sum();

            Assert.Equal(2, sum.RowCount);
            for (int r = 0; r < 2; r++)
            {
                for (int c = 0; c < count.ColumnCount; c++)
                {
                    BaseColumn originalColumn = df.Column(c);
                    if (originalColumn.Name == "Bool" || originalColumn.Name == "Char")
                    {
                        continue;
                    }
                    BaseColumn minColumn = min[originalColumn.Name];
                    Assert.Equal("0", minColumn[r].ToString());

                    BaseColumn productColumn = product[originalColumn.Name];
                    Assert.Equal("0", productColumn[r].ToString());

                    BaseColumn sumColumn = sum[originalColumn.Name];
                    Assert.Equal("20", sumColumn[r].ToString());
                }
            }
        }
コード例 #9
0
        private void VerifyJoin(DataFrame join, DataFrame left, DataFrame right, JoinAlgorithm joinAlgorithm)
        {
            PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("map", join.RowCount);

            for (long i = 0; i < join.RowCount; i++)
            {
                mapIndices[i] = i;
            }
            for (int i = 0; i < join.ColumnCount; i++)
            {
                BaseColumn joinColumn = join.Column(i);
                BaseColumn isEqual;

                if (joinAlgorithm == JoinAlgorithm.Left)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn;
                    }
                    else
                    {
                        int        columnIndex   = i - left.ColumnCount;
                        BaseColumn rightColumn   = right.Column(columnIndex);
                        BaseColumn compareColumn = rightColumn.Length <= join.RowCount ? rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length) : rightColumn.Clone(mapIndices);
                        isEqual = joinColumn == compareColumn;
                    }
                }
                else if (joinAlgorithm == JoinAlgorithm.Right)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn    = left.Column(i);
                        BaseColumn compareColumn = leftColumn.Length <= join.RowCount ? leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length) : leftColumn.Clone(mapIndices);
                        isEqual = joinColumn == compareColumn;
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn;
                    }
                }
                else if (joinAlgorithm == JoinAlgorithm.Inner)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn.Clone(mapIndices);
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn.Clone(mapIndices);
                    }
                }
                else
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length);
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length);
                    }
                }
                for (int j = 0; j < join.RowCount; j++)
                {
                    Assert.Equal(true, isEqual[j]);
                }
            }
        }
コード例 #10
0
        public void TestIEnumerable()
        {
            DataFrame df = MakeDataFrameWithAllColumnTypes(10);

            int totalValueCount = 0;

            for (int i = 0; i < df.ColumnCount; i++)
            {
                BaseColumn baseColumn = df.Column(i);
                foreach (object value in baseColumn)
                {
                    totalValueCount++;
                }
            }
            Assert.Equal(10 * df.ColumnCount, totalValueCount);

            // spot check a few column types:

            StringColumn  stringColumn  = (StringColumn)df["String"];
            StringBuilder actualStrings = new StringBuilder();

            foreach (string value in stringColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("01234<null>6789", actualStrings.ToString());

            ArrowStringColumn arrowStringColumn = (ArrowStringColumn)df["ArrowString"];

            actualStrings.Clear();
            foreach (string value in arrowStringColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("foofoofoofoofoofoofoofoofoofoo", actualStrings.ToString());

            PrimitiveColumn <float> floatColumn = (PrimitiveColumn <float>)df["Float"];

            actualStrings.Clear();
            foreach (float?value in floatColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("01234<null>6789", actualStrings.ToString());

            PrimitiveColumn <int> intColumn = (PrimitiveColumn <int>)df["Int"];

            actualStrings.Clear();
            foreach (int?value in intColumn)
            {
                if (value == null)
                {
                    actualStrings.Append("<null>");
                }
                else
                {
                    actualStrings.Append(value);
                }
            }
            Assert.Equal("01234<null>6789", actualStrings.ToString());
        }