Пример #1
0
        public void TestSummarizePerColumn()
        {
            var dm = DataMap.FromDictionary(new Hashtable()
            {
                { "cat", new int[] { 1, 2, 1, 1, 3 } },
                { "value1", new string[] { "a", "b", "c", "d", "e" } },
                { "value2", new double[] { 10, 20, 30, 40, 50 } },
                { "value3", new double[] { 1, 2, 3, 4, 5 } },
            });

            var g = new GroupBy(dm, new string[] { "cat" }, new string[] { "cat", "value1", "value2", "value3" });

            var s = g.Summarize(new[] { "value2", "value3" }, new Dictionary <string, string>()
            {
                { "max", "Max" },
                { "min", "c => c.Min()" }
            });

            Assert.Equal(new[] { "cat", "max/value2", "min/value2", "max/value3", "min/value3" }, s.ColumnNames.ToArray());

            Assert.Equal(typeof(int), s["cat"].DataType);
            Assert.Equal(typeof(double), s["max/value2"].DataType);
            Assert.Equal(typeof(double), s["min/value3"].DataType);

            Assert.Equal(new int[] { 1, 2, 3 }, s["cat"].ToArray <int>());
            Assert.Equal(new double[] { 40, 20, 50 }, s["max/value2"].ToArray <double>());
            Assert.Equal(new double[] { 1, 2, 5 }, s["min/value3"].ToArray <double>());
        }
Пример #2
0
        public void TestPileAll()
        {
            var d1 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new float[] { 1, 2 } },
                { "bar", new string[] { "a", "b", "c" } },
            });

            var d2 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "bar", new string[] { "x", "y" } },
                { "foo", new float[] { 4, 5 } },
                { "baz", new bool[] { true, false, false } }
            });

            var result = DataMap.PileAll(d1, d2);

            Assert.Equal(6, result.MaxRowCount);
            Assert.Equal(new string[] { "foo", "bar", "baz" }, result.ColumnNames.ToArray());

            var c1 = result["foo"].AsArray <float>();

            Assert.Equal(new float[] { 1, 2, float.NaN, 4, 5, float.NaN }, c1);

            var c2 = result["bar"].AsArray <string>();

            Assert.Equal(new string[] { "a", "b", "c", "x", "y", "" }, c2);

            var c3 = result["baz"].AsArray <bool>();

            Assert.Equal(new bool[] { false, false, false, true, false, false }, c3);
        }
Пример #3
0
        public void TestLeftJoinWithDuplicateRows()
        {
            var d1 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "key1", new int[] { 1, 2, 3, 1 } },
                { "key2", new string[] { "a", "b", "c", "a" } },
                { "x", new int[] { 10, 20, 30, 40 } },
                { "y", new string[] { "x", "y", "z" } }
            });

            var d2 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "key1", new int[] { 99, 1, 2, 4 } },
                { "key2", new string[] { "xxx", "a", "b", "d" } },
                { "x", new double[] { 100, 200, 300, 400 } },
            });

            var result = d1.LeftJoin(d2, new[] { "key1", "key2" });

            Assert.Equal(new[] { "key1", "key2", "x", "y", "key1_1", "key2_1", "x_1" }, result.ColumnNames);
            Assert.Equal(new int[] { 1, 2, 3, 1 }, result["key1"].Values);
            Assert.Equal(new string[] { "a", "b", "c", "a" }, result["key2"].Values);
            Assert.Equal(new int[] { 10, 20, 30, 40 }, result["x"].Values);
            Assert.Equal(new string[] { "x", "y", "z" }, result["y"].Values);
            Assert.Equal(new double[] { 200, 300, double.NaN, 200 }, result["x_1"].Values);
        }
Пример #4
0
        public void TestLeftJoinWithShorterRightSide()
        {
            var d1 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "key1", new int[] { 1, 2 } },
                { "key2", new string[] { "a", "b" } },
                { "x", new int[] { 10, 20 } },
                { "y", new string[] { "x", "y" } }
            });

            var d2 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "key1xx", new int[] { 99, 1, 2, 4 } },
                { "key2xx", new string[] { "xxx", "a", "b", "d" } },
                { "x", new double[] { 100, 200, 300, 400 } },
            });

            var result = d1.LeftJoin(d2, new[] { "key1", "key2" }, new[] { "key1xx", "key2xx" });

            Assert.Equal(new[] { "key1", "key2", "x", "y", "key1xx", "key2xx", "x_1" }, result.ColumnNames);
            Assert.Equal(new int[] { 1, 2 }, result["key1"].Values);
            Assert.Equal(new string[] { "a", "b" }, result["key2"].Values);
            Assert.Equal(new int[] { 10, 20 }, result["x"].Values);
            Assert.Equal(new string[] { "x", "y" }, result["y"].Values);
            Assert.Equal(new double[] { 200, 300 }, result["x_1"].Values);
        }
Пример #5
0
        public void TestGroupBy()
        {
            var dm = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "cat", new int[] { 1, 2, 1, 1, 3 } },
                { "value1", new string[] { "a", "b", "c", "d", "e" } },
                { "value2", new double[] { 10, 20, 30, 40, 50 } },
                { "value3", new double[] { 1, 2, 3, 4, 5 } },
                { "value4", new int[] { 99, 999 } }
            });

            var g = new GroupBy(dm, new string[] { "cat" }, new string[] { "value1", "value3", "value4" });

            var s1 = g.GetSubset(1);

            Assert.Equal(3, s1.MaxRowCount);
            Assert.Equal(1, s1.MinRowCount);
            Assert.Equal(new string[] { "value1", "value3", "value4" }, s1.ColumnNames);

            Assert.Equal(new string[] { "a", "c", "d" }, s1["value1"].UnderlyingList);
            Assert.Equal(new double[] { 1, 3, 4 }, s1["value3"].UnderlyingList);
            Assert.Equal(new int[] { 99 }, s1["value4"].UnderlyingList);

            var s2 = g.GetSubset(3);

            Assert.Equal(new string[] { "value1", "value3", "value4" }, s2.ColumnNames);

            Assert.Equal(1, s2.MaxRowCount);
            Assert.Equal(0, s2.MinRowCount);

            Assert.Equal(new string[] { "e" }, s2["value1"].UnderlyingList);
            Assert.Equal(new double[] { 5 }, s2["value3"].UnderlyingList);
            Assert.Empty(s2["value4"].UnderlyingList);
        }
Пример #6
0
        public void TestConcatenateAll()
        {
            var d1 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new float[] { 1, 2, 3, 4, 5 } },
                { "bar", new string[] { "a", "b", "c", "d", "e" } },
            });

            var d2 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "bar", new string[] { "x", "y", "z" } },
                { "baz", new bool[] { true, false, false } }
            });

            var result = DataMap.ConcatenateAll(d1, d2);

            Assert.Equal(5, result.RowCount);
            Assert.Equal(new string[] { "foo", "bar", "bar_1", "baz" }, result.ColumnNames.ToArray());

            var c1 = result.GetAs <float>("foo");

            Assert.Equal(new float[] { 1, 2, 3, 4, 5 }, c1);

            var c2 = result.GetAs <string>("bar");

            Assert.Equal(new string[] { "a", "b", "c", "d", "e" }, c2);

            var c3 = result.GetAs <string>("bar_1");

            Assert.Equal(new string[] { "x", "y", "z" }, c3);

            var c4 = result.GetAs <bool>("baz");

            Assert.Equal(new bool[] { true, false, false }, c4);
        }
Пример #7
0
        public void TestKFold()
        {
            var d = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "x", new int[] { 1, 2, 3, 4, 5, 6, 7 } },
                { "y", new string[] { "a", "b", "c", "d" } }
            });

            var splitter = new KFoldSplitter(d, 3);

            var folds = splitter.EnumerateFolds().ToArray();

            Assert.Equal(3, folds.Length);

            Assert.Equal(4, folds[0].Training.MaxRowCount);
            Assert.Equal(1, folds[0].Training.MinRowCount);
            Assert.Equal(3, folds[0].Validation.MaxRowCount);
            Assert.Equal(3, folds[0].Validation.MinRowCount);

            Assert.Equal(4, folds[1].Training.MaxRowCount);
            Assert.Equal(3, folds[1].Training.MinRowCount);
            Assert.Equal(3, folds[1].Validation.MaxRowCount);
            Assert.Equal(1, folds[1].Validation.MinRowCount);

            Assert.Equal(6, folds[2].Training.MaxRowCount);
            Assert.Equal(4, folds[2].Training.MinRowCount);
            Assert.Equal(1, folds[2].Validation.MaxRowCount);
            Assert.Equal(0, folds[2].Validation.MinRowCount);

            Assert.Equal(new int[] { 1, 2, 3, 4, 5, 6 }, folds[2].Training["x"].UnderlyingList);
            Assert.Equal(new string[] { "a", "b", "c", "d" }, folds[2].Training["y"].UnderlyingList);

            Assert.Equal(new int[] { 7 }, folds[2].Validation["x"].UnderlyingList);
            Assert.Equal(new string[] {}, folds[2].Validation["y"].UnderlyingList);
        }
Пример #8
0
        public void TestMultipleColumns2()
        {
            var dm = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "cat1", new int[]    { 1, 2, 1, 1, 3 } },
                { "cat2", new string[] { "a", "b", "c", "a", "a" } },
                { "value1", new int[]  { 1, 2, 3, 4, 5 } }
            });

            var g = new GroupBy(dm, new string[] { "cat1", "cat2" });

            var groups = g.Groups().ToArray();

            Assert.Equal(4, groups.Length);

            Assert.Equal(new[] { "cat1", "cat2", "value1" }, groups[0].ColumnNames);
            Assert.Equal(new[] { 1, 1 }, groups[0]["cat1"].Values);
            Assert.Equal(new[] { "a", "a" }, groups[0]["cat2"].Values);
            Assert.Equal(new[] { 1, 4 }, groups[0]["value1"].Values);

            Assert.Equal(new[] { "cat1", "cat2", "value1" }, groups[3].ColumnNames);
            Assert.Equal(new[] { 3 }, groups[3]["cat1"].Values);
            Assert.Equal(new[] { "a" }, groups[3]["cat2"].Values);
            Assert.Equal(new[] { 5 }, groups[3]["value1"].Values);
        }
Пример #9
0
        public void TestSelectColumns()
        {
            var d = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new float[] { 1, 2, 3, 4, 5 } },
                { "bar", new string[] { "a", "b", "c", "d", "e" } },
                { "baz", Enumerable.Range(0, 5).ToArray() }
            });

            d.SetOrder(new string[] { "foo", "bar", "baz" });

            DataMap d2 = d.SelectColumns("bar", "baz");

            Assert.Equal(new string[] { "bar", "baz" }, d2.ColumnNames.ToArray());
        }
Пример #10
0
        protected override void BeginProcessing()
        {
            DataMap map = null;

            if (FromDictionary != null)
            {
                map = DataMap.FromDictionary(FromDictionary);
            }
            else
            {
                map = new DataMap();
            }

            WriteObject(map);
        }
Пример #11
0
        public void TestMultipleColumns()
        {
            var dm = DataMap.FromDictionary(new Hashtable()
            {
                { "cat1", new int[] { 1, 2, 1, 1, 3 } },
                { "cat2", new string[] { "a", "b", "c", "a", "a" } },
                { "value1", new double[] { 1, 2, 3, 4, 5 } }
            });

            var g = new GroupBy(dm, new string[] { "cat1", "cat2" });

            var s1 = g.GetSubset(1, "a");

            Assert.Equal(new double[] { 1, 4 }, s1["value1"].UnderlyingList);
        }
Пример #12
0
        public void TestSlice()
        {
            var d1 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new float[] { 1, 2, 3, 4, 5 } },
                { "bar", new string[] { "a", "b" } }
            });

            var d2 = d1.Slice(1, 2);

            Assert.Equal(new[] { "foo", "bar" }, d2.ColumnNames);
            Assert.Equal(2, d2["foo"].Count);
            Assert.Equal(new float[] { 2, 3 }, d2["foo"].AsList <float>());
            Assert.Equal(1, d2["bar"].Count);
            Assert.Equal(new string[] { "b" }, d2["bar"].AsList <string>());
        }
Пример #13
0
        public void TestSort()
        {
            var d1 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new string[] { "c", "a", "b" } },
                { "bar", new string[] { "x", "y", "Z" } },
                { "baz", new float[] { 1, 2, 3 } }
            });

            var sorted = d1.Sort("foo");

            Assert.Equal(new string[] { "foo", "bar", "baz" }, sorted.ColumnNames);
            Assert.Equal(new string[] { "a", "b", "c" }, sorted["foo"].AsArray <string>());
            Assert.Equal(new string[] { "y", "Z", "x" }, sorted["bar"].AsArray <string>());
            Assert.Equal(new float[] { 2, 3, 1 }, sorted["baz"].AsArray <float>());
        }
Пример #14
0
        public void TestTopRows()
        {
            var d = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "foo", new float[] { 1, 2, 3, 4, 5 } },
                { "bar", new string[] { "a", "b", "c", "d", "e" } },
                { "baz", new int[] { 10, 20 } }
            });

            var d2 = d.TopRows(3);

            Assert.Equal(3, d2.MaxRowCount);
            Assert.Equal(2, d2.MinRowCount);
            Assert.IsType <FilteredListView <float> >(d2["foo"].UnderlyingList);

            Assert.Equal(new float[] { 1, 2, 3 }, d2["foo"].UnderlyingList);
            Assert.Equal(new int[] { 10, 20 }, d2["baz"].UnderlyingList);
        }
Пример #15
0
        public void TestUpdateSubset()
        {
            var dm = DataMap.FromDictionary(new Hashtable()
            {
                { "cat", new int[] { 1, 2, 1, 1, 3 } },
                { "value1", new string[] { "a", "b", "c", "d", "e" } },
                { "value2", new double[] { 10, 20, 30, 40, 50 } },
                { "value3", new double[] { 1, 2, 3, 4, 5 } }
            });

            var g = new GroupBy(dm, new string[] { "cat" }, new string[] { "value1", "value3" });

            var s1 = g.GetSubset(1);

            s1["value3"].ApplyFill <double>((x, i) => x * 100);

            Assert.Equal(new double[] { 100, 2, 300, 400, 5 }, dm["value3"].UnderlyingList);
        }
Пример #16
0
        public void TestUnstackMinColumnCount()
        {
            var d1 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new string[] { "a", "a", "b" } },
                { "bar", new string[] { "a", "b", "c" } },
                { "baz", new float[] { 1, 2, 3 } }
            });

            var unstack = d1.Unstack("bar", new[] { "foo" }, new[] { "baz" }, 5);

            Assert.Equal(new[] { "foo", "a_baz", "b_baz", "c_baz", "na_baz", "na_baz_1", }, unstack.ColumnNames);
            Assert.Equal(new string[] { "a", "b" }, unstack["foo"].AsList <string>());
            Assert.Equal(new float[] { 1, float.NaN }, unstack["a_baz"].AsList <float>());
            Assert.Equal(new float[] { 2, float.NaN }, unstack["b_baz"].AsList <float>());
            Assert.Equal(new float[] { float.NaN, 3 }, unstack["c_baz"].AsList <float>());
            Assert.Equal(new float[] { float.NaN, float.NaN }, unstack["na_baz"].AsList <float>());
            Assert.Equal(new float[] { float.NaN, float.NaN }, unstack["na_baz_1"].AsList <float>());
        }
Пример #17
0
        public void TestSerialization()
        {
            var t1 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "a", new int[] { 1, 2, 3, 4, 5 } },
                { "b", new int[] { 100, 200, 300, 400 } }
            });

            using (var stream = new MemoryStream())
            {
                t1.Save(stream);
                stream.Position = 0;

                var t2 = DataMap.Load(stream);

                Assert.Equal(new[] { "a", "b" }, t2.ColumnNames);
                Assert.Equal(new int[] { 1, 2, 3, 4, 5 }, t2["a"].ToArray <int>());
            }
        }
Пример #18
0
        public void TestToJagged()
        {
            var t1 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "a", new int[] { 1, 2, 3, 4, 5 } },
                { "b", new int[] { 100, 200, 300, 400 } }
            });

            var a = t1.ToJagged <int>();

            Assert.Equal(1, a.Rank);
            Assert.Equal(5, a.Length);

            Assert.Equal(new int[] { 1, 100 }, a[0]);
            Assert.Equal(new int[] { 2, 200 }, a[1]);
            Assert.Equal(new int[] { 3, 300 }, a[2]);
            Assert.Equal(new int[] { 4, 400 }, a[3]);
            Assert.Equal(new int[] { 5, 0 }, a[4]);
        }
Пример #19
0
        protected override void ProcessRecord()
        {
            if (InputObject.BaseObject is IDictionary dict)
            {
                if (_dataMap == null)
                {
                    _dataMap = DataMap.FromDictionary(dict);
                }
                else
                {
                    _dataMap.Pile(DataMap.FromDictionary(dict));
                }
            }
            else
            {
                foreach (var prop in InputObject.Properties)
                {
                    List <object> column;
                    if (!_data.TryGetValue(prop.Name, out column))
                    {
                        column = new List <object>();
                        _data.Add(prop.Name, column);
                        for (var i = 0; i < _recordCount; ++i)
                        {
                            column.Add(null);
                        }
                    }

                    try
                    {
                        var value = Utils.StripOffPSObject(prop.Value);
                        column.Add(value);
                    }
                    catch (Exception)
                    {
                        // An exception can occur when you try to get the property value. It will be ignored.
                        // e.g. the ExitCode property of System.Diagnostics.Process
                        column.Add(null);
                    }
                }
                ++_recordCount;
            }
        }
Пример #20
0
        public void TestMethodCache()
        {
            var dm = DataMap.FromDictionary(new Hashtable()
            {
                { "cat", new int[] { 1, 2, 1, 1, 3 } },
                { "value1", new string[] { "a", "b", "c", "d", "e" } },
                { "value2", new double[] { 10, 20, 30, 40, 50 } },
                { "value3", new double[] { 1, 2, 3, 4, 5 } }
            });

            var result = new List <int>();

            foreach (var g in dm.GroupBy(new string[] { "cat" }))
            {
                var count = g["value2"].CountIf("(x, i) => x <= 30");
                result.Add(count);
            }

            Assert.Equal(new int[] { 2, 1, 0 }, result);
        }
Пример #21
0
        public void TestUnstack2()
        {
            var d1 = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new string[] { "a", "a", "b" } },
                { "bar", new string[] { "a", "b", "c" } },
                { "baz", new float[] { 1, 2, 3 } },
                { "baz2", new string[] { "xxx", "yyy", "zzz" } }
            });

            var unstack = d1.Unstack("bar", new[] { "foo" }, new[] { "baz", "baz2" });

            Assert.Equal(new[] { "foo", "a_baz", "a_baz2", "b_baz", "b_baz2", "c_baz", "c_baz2", }, unstack.ColumnNames);
            Assert.Equal(new string[] { "a", "b" }, unstack["foo"].AsList <string>());
            Assert.Equal(new float[] { 1, float.NaN }, unstack["a_baz"].AsList <float>());
            Assert.Equal(new float[] { 2, float.NaN }, unstack["b_baz"].AsList <float>());
            Assert.Equal(new float[] { float.NaN, 3 }, unstack["c_baz"].AsList <float>());
            Assert.Equal(new string[] { "xxx", string.Empty }, unstack["a_baz2"].AsList <string>());
            Assert.Equal(new string[] { "yyy", string.Empty }, unstack["b_baz2"].AsList <string>());
            Assert.Equal(new string[] { string.Empty, "zzz" }, unstack["c_baz2"].AsList <string>());
        }
Пример #22
0
        public void TestFilter()
        {
            var d = DataMap.FromDictionary(new Dictionary <string, IList>()
            {
                { "foo", new float[] { 1, 2, 3, 4, 5 } },
                { "bar", new string[] { "a", "b", "c", "d", "e" } },
            });

            d.SetOrder(new string[] { "foo", "bar" });

            var result = d.Filter(d["foo"].Apply("(x, i) => i % 2 == 0", typeof(bool)));

            Assert.Equal(3, result.RowCount);
            Assert.Equal(new string[] { "foo", "bar" }, result.ColumnNames.ToArray());

            var c1 = result.GetAs <float>("foo");

            Assert.Equal(new float[] { 1, 3, 5 }, c1);

            var c2 = result.GetAs <string>("bar");

            Assert.Equal(new string[] { "a", "c", "e" }, c2);
        }
Пример #23
0
        public void TestDuplicateValues()
        {
            var d1 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "key1", new int[] { 1, 1 } },
                { "x", new int[] { 10, 20 } },
                { "y", new string[] { "x", "y" } }
            });

            var d2 = DataMap.FromDictionary(new OrderedDictionary()
            {
                { "key1", new int[] { 1 } },
                { "x", new double[] { 200 } }
            });

            var result = d1.LeftJoin(d2, new[] { "key1" });

            Assert.Equal(new[] { "key1", "x", "y", "key1_1", "x_1" }, result.ColumnNames);
            Assert.Equal(new int[] { 1, 1 }, result["key1"].Values);
            Assert.Equal(new int[] { 10, 20 }, result["x"].Values);
            Assert.Equal(new string[] { "x", "y" }, result["y"].Values);
            Assert.Equal(new double[] { 200, 200 }, result["x_1"].Values);
        }