public void StringApplyTest3() { // Let's say we have a dataset of US birds: string[] names = { "State", "Bird", "Color" }; string[][] data = { new[] { "Kansas", "Crow", "Black" }, new[] { "Ohio", "Pardal", "Yellow" }, new[] { "Hawaii", "Penguim", "Black" } }; var codebook = new Imputation <string>(names, data); string[][] values = codebook.Transform(data); string str = values.ToCSharp(); // string t = features.ToCSharp(); var expected = new string[][] { new string[] { "Kansas", "Crow", "Black" }, new string[] { "Ohio", "Pardal", "Yellow" }, new string[] { "Hawaii", "Penguim", "Black" } }; Assert.IsTrue(values.IsEqual(expected, rtol: 1e-10)); }
public void learn_test() { #region doc_learn // Suppose we have the following double data containing missing values // (indicated by Double.NaN values). Let's say we would like to replace // those NaN values by inputing likely values at their original locations: double[][] data = { new[] { Double.NaN, 0.5, 0.2, 0.7 }, new[] { 1.2, 6.2, 1.2, 4.2 }, new[] { 10, 2.2, -1.1, Double.NaN }, new[] { 10, Double.NaN, -1.1, 1.0 }, new[] { 10, 2.2, Double.NaN, 1.0 }, }; // Let's create a new data imputation filter: var imputation = new Imputation <double>(); // Let's instruct it to replace NaN values in the first // column by their median, values in the second column // by their average, values in the third column by a fixed // value and values in the last column by their mode: imputation[0].Strategy = ImputationStrategy.Median; imputation[1].Strategy = ImputationStrategy.Mean; imputation[2].Strategy = ImputationStrategy.FixedValue; imputation[2].ReplaceWith = 42; imputation[3].Strategy = ImputationStrategy.Mode; // Learn from the data: imputation.Learn(data); // Now, let's transform the input data using the // data imputation rules we just defined above: double[][] result = imputation.Transform(data); // The result should be: // // double[][] expected = new double[][] // { // new[] { a, 0.5, 0.2, 0.7 }, // new[] { 1.2, 6.2, 1.2, 4.2 }, // new[] { 10, 2.2, -1.1, d }, // new[] { 10, b, -1.1, 1.0 }, // new[] { 10, 2.2, c, 1.0 }, // }; // #endregion double a = Measures.Median(new[] { 1.2, 10, 10, 10 }); double b = Measures.Mean(new[] { 0.5, 6.2, 2.2, 2.2 }); double c = 42; double d = Measures.Mode(new[] { 0.7, 4.2, 1.0, 1.0 }); Assert.AreEqual(imputation[0].ReplaceWith, a); Assert.AreEqual(imputation[1].ReplaceWith, b); Assert.AreEqual(imputation[2].ReplaceWith, c); Assert.AreEqual(imputation[3].ReplaceWith, d); string str = result.ToCSharp(); var expected = new double[][] { new[] { a, 0.5, 0.2, 0.7 }, new[] { 1.2, 6.2, 1.2, 4.2 }, new[] { 10, 2.2, -1.1, d }, new[] { 10, b, -1.1, 1.0 }, new[] { 10, 2.2, c, 1.0 }, }; Assert.IsTrue(result.IsEqual(expected, rtol: 1e-10)); }
public void object_test() { object[][] table = { new object[] { 0, 19, false, "A", 212.522, 1 }, new object[] { 1, 5, false, "B", 4.124, 2 }, new object[] { -1, -1, true, "B", 2.683, 3 }, new object[] { 3, 5, true, null, Double.NaN, 4 }, new object[] { 4, -1, false, "C", 2.151, 5 }, }; // Create a new data projection (column) filter var filter = new Imputation("Id", "Floors", "Finished", "Category", "Cost (M)"); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Id"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Floors"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Finished"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Category"].Strategy); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Cost (M)"].Strategy); Assert.IsFalse(filter.Columns.Contains("Extra")); filter["Floors"].Strategy = ImputationStrategy.FixedValue; filter["Floors"].ReplaceWith = 42; filter["Category"].Strategy = ImputationStrategy.Mode; filter["Cost (M)"].Strategy = ImputationStrategy.Median; filter.Learn(table); Assert.AreEqual(ImputationStrategy.FixedValue, filter["Floors"].Strategy); Assert.AreEqual(-1, filter["Floors"].MissingValue); Assert.AreEqual(42, filter["Floors"].ReplaceWith); Assert.AreEqual(ImputationStrategy.Mode, filter["Category"].Strategy); Assert.AreEqual("B", filter["Category"].ReplaceWith); Assert.AreEqual(ImputationStrategy.Median, filter["Cost (M)"].Strategy); Assert.AreEqual(3.4034999999999997d, (double)filter["Cost (M)"].ReplaceWith, 1e-10); Assert.AreEqual(6, filter.Columns.Count); object[][] result = filter.Transform(table); Assert.AreEqual(6, result.Columns()); Assert.AreEqual(5, result.Rows()); Assert.AreEqual(0, result[0][0]); Assert.AreEqual(1, result[1][0]); Assert.AreEqual(-1, result[2][0]); Assert.AreEqual(3, result[3][0]); Assert.AreEqual(4, result[4][0]); Assert.AreEqual(19, result[0][1]); Assert.AreEqual(5, result[1][1]); Assert.AreEqual(42, result[2][1]); Assert.AreEqual(5, result[3][1]); Assert.AreEqual(42, result[4][1]); Assert.AreEqual(false, result[0][2]); Assert.AreEqual(false, result[1][2]); Assert.AreEqual(true, result[2][2]); Assert.AreEqual(true, result[3][2]); Assert.AreEqual(false, result[4][2]); Assert.AreEqual("A", result[0][3]); Assert.AreEqual("B", result[1][3]); Assert.AreEqual("B", result[2][3]); Assert.AreEqual("B", result[3][3]); Assert.AreEqual("C", result[4][3]); Assert.AreEqual(212.52199999999999d, (double)result[0][4], 1e-10); Assert.AreEqual(4.1239999999999997d, (double)result[1][4], 1e-10); Assert.AreEqual(2.6829999999999998d, (double)result[2][4], 1e-10); Assert.AreEqual(3.4034999999999997d, (double)result[3][4], 1e-10); Assert.AreEqual(2.1509999999999998d, (double)result[4][4], 1e-10); }