public void learn_test() { #region doc_learn // Suppose we have the following double data containing missing values // (indicated by Double.NaN values). Let's say we would like to replace // those NaN values by inputing likely values at their original locations: double[][] data = { new[] { Double.NaN, 0.5, 0.2, 0.7 }, new[] { 1.2, 6.2, 1.2, 4.2 }, new[] { 10, 2.2, -1.1, Double.NaN }, new[] { 10, Double.NaN, -1.1, 1.0 }, new[] { 10, 2.2, Double.NaN, 1.0 }, }; // Let's create a new data imputation filter: var imputation = new Imputation <double>(); // Let's instruct it to replace NaN values in the first // column by their median, values in the second column // by their average, values in the third column by a fixed // value and values in the last column by their mode: imputation[0].Strategy = ImputationStrategy.Median; imputation[1].Strategy = ImputationStrategy.Mean; imputation[2].Strategy = ImputationStrategy.FixedValue; imputation[2].ReplaceWith = 42; imputation[3].Strategy = ImputationStrategy.Mode; // Learn from the data: imputation.Learn(data); // Now, let's transform the input data using the // data imputation rules we just defined above: double[][] result = imputation.Transform(data); // The result should be: // // double[][] expected = new double[][] // { // new[] { a, 0.5, 0.2, 0.7 }, // new[] { 1.2, 6.2, 1.2, 4.2 }, // new[] { 10, 2.2, -1.1, d }, // new[] { 10, b, -1.1, 1.0 }, // new[] { 10, 2.2, c, 1.0 }, // }; // #endregion double a = Measures.Median(new[] { 1.2, 10, 10, 10 }); double b = Measures.Mean(new[] { 0.5, 6.2, 2.2, 2.2 }); double c = 42; double d = Measures.Mode(new[] { 0.7, 4.2, 1.0, 1.0 }); Assert.AreEqual(imputation[0].ReplaceWith, a); Assert.AreEqual(imputation[1].ReplaceWith, b); Assert.AreEqual(imputation[2].ReplaceWith, c); Assert.AreEqual(imputation[3].ReplaceWith, d); string str = result.ToCSharp(); var expected = new double[][] { new[] { a, 0.5, 0.2, 0.7 }, new[] { 1.2, 6.2, 1.2, 4.2 }, new[] { 10, 2.2, -1.1, d }, new[] { 10, b, -1.1, 1.0 }, new[] { 10, 2.2, c, 1.0 }, }; Assert.IsTrue(result.IsEqual(expected, rtol: 1e-10)); }
public void ApplyTest1() { DataTable table = new DataTable("Buildings"); table.Columns.Add(new OrderedDictionary <string, Type> { { "Id", typeof(int) }, { "Floors", typeof(int) }, { "Finished", typeof(bool) }, { "Category", typeof(string) }, { "Cost (M)", typeof(double) }, { "Extra", typeof(decimal) } }); table.Rows.Add(0, 19, false, "A", 212.522, 1); table.Rows.Add(1, 5, false, "B", 4.124, 2); table.Rows.Add(-1, -1, true, "B", 2.683, 3); table.Rows.Add(3, 5, true, null, Double.NaN, 4); table.Rows.Add(4, -1, false, "C", 2.151, 5); Assert.AreEqual(0, table.Rows[0][0]); Assert.AreEqual(-1, table.Rows[2][0]); // Create a new data projection (column) filter var filter = new Imputation("Id", "Floors", "Finished", "Category", "Cost (M)"); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Id"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Floors"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Finished"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Category"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Cost (M)"].Strategy); Assert.IsFalse(filter.Columns.Contains("Extra")); filter["Floors"].Strategy = ImputationStrategy.FixedValue; filter["Floors"].ReplaceWith = 42; filter["Category"].Strategy = ImputationStrategy.Mode; filter["Cost (M)"].Strategy = ImputationStrategy.Median; filter.Learn(table); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Floors"].Strategy); Assert.AreEqual(-1, filter["Floors"].MissingValue); Assert.AreEqual(42, filter["Floors"].ReplaceWith); Assert.AreEqual(ImputationStrategy.Mode, filter["Category"].Strategy); Assert.AreEqual("B", filter["Category"].ReplaceWith); Assert.AreEqual(ImputationStrategy.Median, filter["Cost (M)"].Strategy); Assert.AreEqual(3.4034999999999997d, (double)filter["Cost (M)"].ReplaceWith, 1e-10); Assert.AreEqual(6, filter.Columns.Count); DataTable result = filter.Apply(table); Assert.AreEqual(6, result.Columns.Count); Assert.AreEqual(5, result.Rows.Count); Assert.AreEqual(0, result.Rows[0]["Id"]); Assert.AreEqual(1, result.Rows[1]["Id"]); Assert.AreEqual(-1, result.Rows[2]["Id"]); Assert.AreEqual(3, result.Rows[3]["Id"]); Assert.AreEqual(4, result.Rows[4]["Id"]); Assert.AreEqual(19, result.Rows[0]["Floors"]); Assert.AreEqual(5, result.Rows[1]["Floors"]); Assert.AreEqual(42, result.Rows[2]["Floors"]); Assert.AreEqual(5, result.Rows[3]["Floors"]); Assert.AreEqual(42, result.Rows[4]["Floors"]); Assert.AreEqual(false, result.Rows[0]["Finished"]); Assert.AreEqual(false, result.Rows[1]["Finished"]); Assert.AreEqual(true, result.Rows[2]["Finished"]); Assert.AreEqual(true, result.Rows[3]["Finished"]); Assert.AreEqual(false, result.Rows[4]["Finished"]); Assert.AreEqual("A", result.Rows[0]["Category"]); Assert.AreEqual("B", result.Rows[1]["Category"]); Assert.AreEqual("B", result.Rows[2]["Category"]); Assert.AreEqual("B", result.Rows[3]["Category"]); Assert.AreEqual("C", result.Rows[4]["Category"]); Assert.AreEqual(212.52199999999999d, (double)result.Rows[0]["Cost (M)"], 1e-10); Assert.AreEqual(4.1239999999999997d, (double)result.Rows[1]["Cost (M)"], 1e-10); Assert.AreEqual(2.6829999999999998d, (double)result.Rows[2]["Cost (M)"], 1e-10); Assert.AreEqual(3.4034999999999997d, (double)result.Rows[3]["Cost (M)"], 1e-10); Assert.AreEqual(2.1509999999999998d, (double)result.Rows[4]["Cost (M)"], 1e-10); }
public void object_test() { object[][] table = { new object[] { 0, 19, false, "A", 212.522, 1 }, new object[] { 1, 5, false, "B", 4.124, 2 }, new object[] { -1, -1, true, "B", 2.683, 3 }, new object[] { 3, 5, true, null, Double.NaN, 4 }, new object[] { 4, -1, false, "C", 2.151, 5 }, }; // Create a new data projection (column) filter var filter = new Imputation("Id", "Floors", "Finished", "Category", "Cost (M)"); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Id"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Floors"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Finished"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Category"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Cost (M)"].Strategy); Assert.IsFalse(filter.Columns.Contains("Extra")); filter["Floors"].Strategy = ImputationStrategy.FixedValue; filter["Floors"].ReplaceWith = 42; filter["Category"].Strategy = ImputationStrategy.Mode; filter["Cost (M)"].Strategy = ImputationStrategy.Median; filter.Learn(table); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Floors"].Strategy); Assert.AreEqual(-1, filter["Floors"].MissingValue); Assert.AreEqual(42, filter["Floors"].ReplaceWith); Assert.AreEqual(ImputationStrategy.Mode, filter["Category"].Strategy); Assert.AreEqual("B", filter["Category"].ReplaceWith); Assert.AreEqual(ImputationStrategy.Median, filter["Cost (M)"].Strategy); Assert.AreEqual(3.4034999999999997d, (double)filter["Cost (M)"].ReplaceWith, 1e-10); Assert.AreEqual(6, filter.Columns.Count); object[][] result = filter.Transform(table); Assert.AreEqual(6, result.Columns()); Assert.AreEqual(5, result.Rows()); Assert.AreEqual(0, result[0][0]); Assert.AreEqual(1, result[1][0]); Assert.AreEqual(-1, result[2][0]); Assert.AreEqual(3, result[3][0]); Assert.AreEqual(4, result[4][0]); Assert.AreEqual(19, result[0][1]); Assert.AreEqual(5, result[1][1]); Assert.AreEqual(42, result[2][1]); Assert.AreEqual(5, result[3][1]); Assert.AreEqual(42, result[4][1]); Assert.AreEqual(false, result[0][2]); Assert.AreEqual(false, result[1][2]); Assert.AreEqual(true, result[2][2]); Assert.AreEqual(true, result[3][2]); Assert.AreEqual(false, result[4][2]); Assert.AreEqual("A", result[0][3]); Assert.AreEqual("B", result[1][3]); Assert.AreEqual("B", result[2][3]); Assert.AreEqual("B", result[3][3]); Assert.AreEqual("C", result[4][3]); Assert.AreEqual(212.52199999999999d, (double)result[0][4], 1e-10); Assert.AreEqual(4.1239999999999997d, (double)result[1][4], 1e-10); Assert.AreEqual(2.6829999999999998d, (double)result[2][4], 1e-10); Assert.AreEqual(3.4034999999999997d, (double)result[3][4], 1e-10); Assert.AreEqual(2.1509999999999998d, (double)result[4][4], 1e-10); }