public void TestSummarizePerColumn() { var dm = DataMap.FromDictionary(new Hashtable() { { "cat", new int[] { 1, 2, 1, 1, 3 } }, { "value1", new string[] { "a", "b", "c", "d", "e" } }, { "value2", new double[] { 10, 20, 30, 40, 50 } }, { "value3", new double[] { 1, 2, 3, 4, 5 } }, }); var g = new GroupBy(dm, new string[] { "cat" }, new string[] { "cat", "value1", "value2", "value3" }); var s = g.Summarize(new[] { "value2", "value3" }, new Dictionary <string, string>() { { "max", "Max" }, { "min", "c => c.Min()" } }); Assert.Equal(new[] { "cat", "max/value2", "min/value2", "max/value3", "min/value3" }, s.ColumnNames.ToArray()); Assert.Equal(typeof(int), s["cat"].DataType); Assert.Equal(typeof(double), s["max/value2"].DataType); Assert.Equal(typeof(double), s["min/value3"].DataType); Assert.Equal(new int[] { 1, 2, 3 }, s["cat"].ToArray <int>()); Assert.Equal(new double[] { 40, 20, 50 }, s["max/value2"].ToArray <double>()); Assert.Equal(new double[] { 1, 2, 5 }, s["min/value3"].ToArray <double>()); }
public void TestPileAll() { var d1 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new float[] { 1, 2 } }, { "bar", new string[] { "a", "b", "c" } }, }); var d2 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "bar", new string[] { "x", "y" } }, { "foo", new float[] { 4, 5 } }, { "baz", new bool[] { true, false, false } } }); var result = DataMap.PileAll(d1, d2); Assert.Equal(6, result.MaxRowCount); Assert.Equal(new string[] { "foo", "bar", "baz" }, result.ColumnNames.ToArray()); var c1 = result["foo"].AsArray <float>(); Assert.Equal(new float[] { 1, 2, float.NaN, 4, 5, float.NaN }, c1); var c2 = result["bar"].AsArray <string>(); Assert.Equal(new string[] { "a", "b", "c", "x", "y", "" }, c2); var c3 = result["baz"].AsArray <bool>(); Assert.Equal(new bool[] { false, false, false, true, false, false }, c3); }
public void TestLeftJoinWithDuplicateRows() { var d1 = DataMap.FromDictionary(new OrderedDictionary() { { "key1", new int[] { 1, 2, 3, 1 } }, { "key2", new string[] { "a", "b", "c", "a" } }, { "x", new int[] { 10, 20, 30, 40 } }, { "y", new string[] { "x", "y", "z" } } }); var d2 = DataMap.FromDictionary(new OrderedDictionary() { { "key1", new int[] { 99, 1, 2, 4 } }, { "key2", new string[] { "xxx", "a", "b", "d" } }, { "x", new double[] { 100, 200, 300, 400 } }, }); var result = d1.LeftJoin(d2, new[] { "key1", "key2" }); Assert.Equal(new[] { "key1", "key2", "x", "y", "key1_1", "key2_1", "x_1" }, result.ColumnNames); Assert.Equal(new int[] { 1, 2, 3, 1 }, result["key1"].Values); Assert.Equal(new string[] { "a", "b", "c", "a" }, result["key2"].Values); Assert.Equal(new int[] { 10, 20, 30, 40 }, result["x"].Values); Assert.Equal(new string[] { "x", "y", "z" }, result["y"].Values); Assert.Equal(new double[] { 200, 300, double.NaN, 200 }, result["x_1"].Values); }
public void TestLeftJoinWithShorterRightSide() { var d1 = DataMap.FromDictionary(new OrderedDictionary() { { "key1", new int[] { 1, 2 } }, { "key2", new string[] { "a", "b" } }, { "x", new int[] { 10, 20 } }, { "y", new string[] { "x", "y" } } }); var d2 = DataMap.FromDictionary(new OrderedDictionary() { { "key1xx", new int[] { 99, 1, 2, 4 } }, { "key2xx", new string[] { "xxx", "a", "b", "d" } }, { "x", new double[] { 100, 200, 300, 400 } }, }); var result = d1.LeftJoin(d2, new[] { "key1", "key2" }, new[] { "key1xx", "key2xx" }); Assert.Equal(new[] { "key1", "key2", "x", "y", "key1xx", "key2xx", "x_1" }, result.ColumnNames); Assert.Equal(new int[] { 1, 2 }, result["key1"].Values); Assert.Equal(new string[] { "a", "b" }, result["key2"].Values); Assert.Equal(new int[] { 10, 20 }, result["x"].Values); Assert.Equal(new string[] { "x", "y" }, result["y"].Values); Assert.Equal(new double[] { 200, 300 }, result["x_1"].Values); }
public void TestGroupBy() { var dm = DataMap.FromDictionary(new OrderedDictionary() { { "cat", new int[] { 1, 2, 1, 1, 3 } }, { "value1", new string[] { "a", "b", "c", "d", "e" } }, { "value2", new double[] { 10, 20, 30, 40, 50 } }, { "value3", new double[] { 1, 2, 3, 4, 5 } }, { "value4", new int[] { 99, 999 } } }); var g = new GroupBy(dm, new string[] { "cat" }, new string[] { "value1", "value3", "value4" }); var s1 = g.GetSubset(1); Assert.Equal(3, s1.MaxRowCount); Assert.Equal(1, s1.MinRowCount); Assert.Equal(new string[] { "value1", "value3", "value4" }, s1.ColumnNames); Assert.Equal(new string[] { "a", "c", "d" }, s1["value1"].UnderlyingList); Assert.Equal(new double[] { 1, 3, 4 }, s1["value3"].UnderlyingList); Assert.Equal(new int[] { 99 }, s1["value4"].UnderlyingList); var s2 = g.GetSubset(3); Assert.Equal(new string[] { "value1", "value3", "value4" }, s2.ColumnNames); Assert.Equal(1, s2.MaxRowCount); Assert.Equal(0, s2.MinRowCount); Assert.Equal(new string[] { "e" }, s2["value1"].UnderlyingList); Assert.Equal(new double[] { 5 }, s2["value3"].UnderlyingList); Assert.Empty(s2["value4"].UnderlyingList); }
public void TestConcatenateAll() { var d1 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new float[] { 1, 2, 3, 4, 5 } }, { "bar", new string[] { "a", "b", "c", "d", "e" } }, }); var d2 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "bar", new string[] { "x", "y", "z" } }, { "baz", new bool[] { true, false, false } } }); var result = DataMap.ConcatenateAll(d1, d2); Assert.Equal(5, result.RowCount); Assert.Equal(new string[] { "foo", "bar", "bar_1", "baz" }, result.ColumnNames.ToArray()); var c1 = result.GetAs <float>("foo"); Assert.Equal(new float[] { 1, 2, 3, 4, 5 }, c1); var c2 = result.GetAs <string>("bar"); Assert.Equal(new string[] { "a", "b", "c", "d", "e" }, c2); var c3 = result.GetAs <string>("bar_1"); Assert.Equal(new string[] { "x", "y", "z" }, c3); var c4 = result.GetAs <bool>("baz"); Assert.Equal(new bool[] { true, false, false }, c4); }
public void TestKFold() { var d = DataMap.FromDictionary(new OrderedDictionary() { { "x", new int[] { 1, 2, 3, 4, 5, 6, 7 } }, { "y", new string[] { "a", "b", "c", "d" } } }); var splitter = new KFoldSplitter(d, 3); var folds = splitter.EnumerateFolds().ToArray(); Assert.Equal(3, folds.Length); Assert.Equal(4, folds[0].Training.MaxRowCount); Assert.Equal(1, folds[0].Training.MinRowCount); Assert.Equal(3, folds[0].Validation.MaxRowCount); Assert.Equal(3, folds[0].Validation.MinRowCount); Assert.Equal(4, folds[1].Training.MaxRowCount); Assert.Equal(3, folds[1].Training.MinRowCount); Assert.Equal(3, folds[1].Validation.MaxRowCount); Assert.Equal(1, folds[1].Validation.MinRowCount); Assert.Equal(6, folds[2].Training.MaxRowCount); Assert.Equal(4, folds[2].Training.MinRowCount); Assert.Equal(1, folds[2].Validation.MaxRowCount); Assert.Equal(0, folds[2].Validation.MinRowCount); Assert.Equal(new int[] { 1, 2, 3, 4, 5, 6 }, folds[2].Training["x"].UnderlyingList); Assert.Equal(new string[] { "a", "b", "c", "d" }, folds[2].Training["y"].UnderlyingList); Assert.Equal(new int[] { 7 }, folds[2].Validation["x"].UnderlyingList); Assert.Equal(new string[] {}, folds[2].Validation["y"].UnderlyingList); }
public void TestMultipleColumns2() { var dm = DataMap.FromDictionary(new OrderedDictionary() { { "cat1", new int[] { 1, 2, 1, 1, 3 } }, { "cat2", new string[] { "a", "b", "c", "a", "a" } }, { "value1", new int[] { 1, 2, 3, 4, 5 } } }); var g = new GroupBy(dm, new string[] { "cat1", "cat2" }); var groups = g.Groups().ToArray(); Assert.Equal(4, groups.Length); Assert.Equal(new[] { "cat1", "cat2", "value1" }, groups[0].ColumnNames); Assert.Equal(new[] { 1, 1 }, groups[0]["cat1"].Values); Assert.Equal(new[] { "a", "a" }, groups[0]["cat2"].Values); Assert.Equal(new[] { 1, 4 }, groups[0]["value1"].Values); Assert.Equal(new[] { "cat1", "cat2", "value1" }, groups[3].ColumnNames); Assert.Equal(new[] { 3 }, groups[3]["cat1"].Values); Assert.Equal(new[] { "a" }, groups[3]["cat2"].Values); Assert.Equal(new[] { 5 }, groups[3]["value1"].Values); }
public void TestSelectColumns() { var d = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new float[] { 1, 2, 3, 4, 5 } }, { "bar", new string[] { "a", "b", "c", "d", "e" } }, { "baz", Enumerable.Range(0, 5).ToArray() } }); d.SetOrder(new string[] { "foo", "bar", "baz" }); DataMap d2 = d.SelectColumns("bar", "baz"); Assert.Equal(new string[] { "bar", "baz" }, d2.ColumnNames.ToArray()); }
protected override void BeginProcessing() { DataMap map = null; if (FromDictionary != null) { map = DataMap.FromDictionary(FromDictionary); } else { map = new DataMap(); } WriteObject(map); }
public void TestMultipleColumns() { var dm = DataMap.FromDictionary(new Hashtable() { { "cat1", new int[] { 1, 2, 1, 1, 3 } }, { "cat2", new string[] { "a", "b", "c", "a", "a" } }, { "value1", new double[] { 1, 2, 3, 4, 5 } } }); var g = new GroupBy(dm, new string[] { "cat1", "cat2" }); var s1 = g.GetSubset(1, "a"); Assert.Equal(new double[] { 1, 4 }, s1["value1"].UnderlyingList); }
public void TestSlice() { var d1 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new float[] { 1, 2, 3, 4, 5 } }, { "bar", new string[] { "a", "b" } } }); var d2 = d1.Slice(1, 2); Assert.Equal(new[] { "foo", "bar" }, d2.ColumnNames); Assert.Equal(2, d2["foo"].Count); Assert.Equal(new float[] { 2, 3 }, d2["foo"].AsList <float>()); Assert.Equal(1, d2["bar"].Count); Assert.Equal(new string[] { "b" }, d2["bar"].AsList <string>()); }
public void TestSort() { var d1 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new string[] { "c", "a", "b" } }, { "bar", new string[] { "x", "y", "Z" } }, { "baz", new float[] { 1, 2, 3 } } }); var sorted = d1.Sort("foo"); Assert.Equal(new string[] { "foo", "bar", "baz" }, sorted.ColumnNames); Assert.Equal(new string[] { "a", "b", "c" }, sorted["foo"].AsArray <string>()); Assert.Equal(new string[] { "y", "Z", "x" }, sorted["bar"].AsArray <string>()); Assert.Equal(new float[] { 2, 3, 1 }, sorted["baz"].AsArray <float>()); }
public void TestTopRows() { var d = DataMap.FromDictionary(new OrderedDictionary() { { "foo", new float[] { 1, 2, 3, 4, 5 } }, { "bar", new string[] { "a", "b", "c", "d", "e" } }, { "baz", new int[] { 10, 20 } } }); var d2 = d.TopRows(3); Assert.Equal(3, d2.MaxRowCount); Assert.Equal(2, d2.MinRowCount); Assert.IsType <FilteredListView <float> >(d2["foo"].UnderlyingList); Assert.Equal(new float[] { 1, 2, 3 }, d2["foo"].UnderlyingList); Assert.Equal(new int[] { 10, 20 }, d2["baz"].UnderlyingList); }
public void TestUpdateSubset() { var dm = DataMap.FromDictionary(new Hashtable() { { "cat", new int[] { 1, 2, 1, 1, 3 } }, { "value1", new string[] { "a", "b", "c", "d", "e" } }, { "value2", new double[] { 10, 20, 30, 40, 50 } }, { "value3", new double[] { 1, 2, 3, 4, 5 } } }); var g = new GroupBy(dm, new string[] { "cat" }, new string[] { "value1", "value3" }); var s1 = g.GetSubset(1); s1["value3"].ApplyFill <double>((x, i) => x * 100); Assert.Equal(new double[] { 100, 2, 300, 400, 5 }, dm["value3"].UnderlyingList); }
public void TestUnstackMinColumnCount() { var d1 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new string[] { "a", "a", "b" } }, { "bar", new string[] { "a", "b", "c" } }, { "baz", new float[] { 1, 2, 3 } } }); var unstack = d1.Unstack("bar", new[] { "foo" }, new[] { "baz" }, 5); Assert.Equal(new[] { "foo", "a_baz", "b_baz", "c_baz", "na_baz", "na_baz_1", }, unstack.ColumnNames); Assert.Equal(new string[] { "a", "b" }, unstack["foo"].AsList <string>()); Assert.Equal(new float[] { 1, float.NaN }, unstack["a_baz"].AsList <float>()); Assert.Equal(new float[] { 2, float.NaN }, unstack["b_baz"].AsList <float>()); Assert.Equal(new float[] { float.NaN, 3 }, unstack["c_baz"].AsList <float>()); Assert.Equal(new float[] { float.NaN, float.NaN }, unstack["na_baz"].AsList <float>()); Assert.Equal(new float[] { float.NaN, float.NaN }, unstack["na_baz_1"].AsList <float>()); }
public void TestSerialization() { var t1 = DataMap.FromDictionary(new OrderedDictionary() { { "a", new int[] { 1, 2, 3, 4, 5 } }, { "b", new int[] { 100, 200, 300, 400 } } }); using (var stream = new MemoryStream()) { t1.Save(stream); stream.Position = 0; var t2 = DataMap.Load(stream); Assert.Equal(new[] { "a", "b" }, t2.ColumnNames); Assert.Equal(new int[] { 1, 2, 3, 4, 5 }, t2["a"].ToArray <int>()); } }
public void TestToJagged() { var t1 = DataMap.FromDictionary(new OrderedDictionary() { { "a", new int[] { 1, 2, 3, 4, 5 } }, { "b", new int[] { 100, 200, 300, 400 } } }); var a = t1.ToJagged <int>(); Assert.Equal(1, a.Rank); Assert.Equal(5, a.Length); Assert.Equal(new int[] { 1, 100 }, a[0]); Assert.Equal(new int[] { 2, 200 }, a[1]); Assert.Equal(new int[] { 3, 300 }, a[2]); Assert.Equal(new int[] { 4, 400 }, a[3]); Assert.Equal(new int[] { 5, 0 }, a[4]); }
protected override void ProcessRecord() { if (InputObject.BaseObject is IDictionary dict) { if (_dataMap == null) { _dataMap = DataMap.FromDictionary(dict); } else { _dataMap.Pile(DataMap.FromDictionary(dict)); } } else { foreach (var prop in InputObject.Properties) { List <object> column; if (!_data.TryGetValue(prop.Name, out column)) { column = new List <object>(); _data.Add(prop.Name, column); for (var i = 0; i < _recordCount; ++i) { column.Add(null); } } try { var value = Utils.StripOffPSObject(prop.Value); column.Add(value); } catch (Exception) { // An exception can occur when you try to get the property value. It will be ignored. // e.g. the ExitCode property of System.Diagnostics.Process column.Add(null); } } ++_recordCount; } }
public void TestMethodCache() { var dm = DataMap.FromDictionary(new Hashtable() { { "cat", new int[] { 1, 2, 1, 1, 3 } }, { "value1", new string[] { "a", "b", "c", "d", "e" } }, { "value2", new double[] { 10, 20, 30, 40, 50 } }, { "value3", new double[] { 1, 2, 3, 4, 5 } } }); var result = new List <int>(); foreach (var g in dm.GroupBy(new string[] { "cat" })) { var count = g["value2"].CountIf("(x, i) => x <= 30"); result.Add(count); } Assert.Equal(new int[] { 2, 1, 0 }, result); }
public void TestUnstack2() { var d1 = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new string[] { "a", "a", "b" } }, { "bar", new string[] { "a", "b", "c" } }, { "baz", new float[] { 1, 2, 3 } }, { "baz2", new string[] { "xxx", "yyy", "zzz" } } }); var unstack = d1.Unstack("bar", new[] { "foo" }, new[] { "baz", "baz2" }); Assert.Equal(new[] { "foo", "a_baz", "a_baz2", "b_baz", "b_baz2", "c_baz", "c_baz2", }, unstack.ColumnNames); Assert.Equal(new string[] { "a", "b" }, unstack["foo"].AsList <string>()); Assert.Equal(new float[] { 1, float.NaN }, unstack["a_baz"].AsList <float>()); Assert.Equal(new float[] { 2, float.NaN }, unstack["b_baz"].AsList <float>()); Assert.Equal(new float[] { float.NaN, 3 }, unstack["c_baz"].AsList <float>()); Assert.Equal(new string[] { "xxx", string.Empty }, unstack["a_baz2"].AsList <string>()); Assert.Equal(new string[] { "yyy", string.Empty }, unstack["b_baz2"].AsList <string>()); Assert.Equal(new string[] { string.Empty, "zzz" }, unstack["c_baz2"].AsList <string>()); }
public void TestFilter() { var d = DataMap.FromDictionary(new Dictionary <string, IList>() { { "foo", new float[] { 1, 2, 3, 4, 5 } }, { "bar", new string[] { "a", "b", "c", "d", "e" } }, }); d.SetOrder(new string[] { "foo", "bar" }); var result = d.Filter(d["foo"].Apply("(x, i) => i % 2 == 0", typeof(bool))); Assert.Equal(3, result.RowCount); Assert.Equal(new string[] { "foo", "bar" }, result.ColumnNames.ToArray()); var c1 = result.GetAs <float>("foo"); Assert.Equal(new float[] { 1, 3, 5 }, c1); var c2 = result.GetAs <string>("bar"); Assert.Equal(new string[] { "a", "c", "e" }, c2); }
public void TestDuplicateValues() { var d1 = DataMap.FromDictionary(new OrderedDictionary() { { "key1", new int[] { 1, 1 } }, { "x", new int[] { 10, 20 } }, { "y", new string[] { "x", "y" } } }); var d2 = DataMap.FromDictionary(new OrderedDictionary() { { "key1", new int[] { 1 } }, { "x", new double[] { 200 } } }); var result = d1.LeftJoin(d2, new[] { "key1" }); Assert.Equal(new[] { "key1", "x", "y", "key1_1", "x_1" }, result.ColumnNames); Assert.Equal(new int[] { 1, 1 }, result["key1"].Values); Assert.Equal(new int[] { 10, 20 }, result["x"].Values); Assert.Equal(new string[] { "x", "y" }, result["y"].Values); Assert.Equal(new double[] { 200, 200 }, result["x_1"].Values); }