public void ApplyTest1() { DataTable table = ProjectionFilterTest.CreateTable(); // Show the start data // DataGridBox.Show(table); // Create a new data projection (column) filter var filter = new Discretization("Cost (M)"); // Apply the filter and get the result DataTable result = filter.Apply(table); // Show it // DataGridBox.Show(result); Assert.AreEqual(5, result.Columns.Count); Assert.AreEqual(5, result.Rows.Count); Assert.AreEqual("213", result.Rows[0]["Cost (M)"]); Assert.AreEqual("4", result.Rows[1]["Cost (M)"]); Assert.AreEqual("3", result.Rows[2]["Cost (M)"]); Assert.AreEqual("3", result.Rows[3]["Cost (M)"]); Assert.AreEqual("2", result.Rows[4]["Cost (M)"]); }
public void rule_matching_test() { DataTable input = new DataTable("Sample data"); input.Columns.Add("x", typeof(double)); input.Columns.Add("y", typeof(double)); input.Columns.Add("z", typeof(double)); input.Rows.Add(0.02, 60.6, 24.2); input.Rows.Add(0.92, 50.2, 21.1); input.Rows.Add(0.32, 60.9, 19.8); input.Rows.Add(2.02, 61.8, 92.4); // Create a discretization filter to operate on the first 2 columns var target = new Discretization <double, int>("x", "y"); target.Columns["x"].Mapping[x => true] = x => (int)System.Math.Round(x, MidpointRounding.AwayFromZero); target.Columns["y"].Mapping[x => true] = x => ((x - (int)x) >= 0.7999999999999) ? ((int)x + 1) : (int)x; DataTable expected = new DataTable("Sample data"); expected.Columns.Add("x", typeof(double)); expected.Columns.Add("y", typeof(double)); expected.Columns.Add("z", typeof(double)); expected.Rows.Add(0, 60, 24.2); expected.Rows.Add(1, 50, 21.1); expected.Rows.Add(0, 61, 19.8); expected.Rows.Add(2, 62, 92.4); DataTable actual = target.Apply(input); for (int i = 0; i < actual.Rows.Count; i++) { double ex = (double)expected.Rows[i][0]; double ey = (double)expected.Rows[i][1]; double ez = (double)expected.Rows[i][2]; double ax = (int)actual.Rows[i][0]; double ay = (int)actual.Rows[i][1]; double az = (double)actual.Rows[i][2]; Assert.AreEqual(ex, ax); Assert.AreEqual(ey, ay); Assert.AreEqual(ez, az); } }
public void ApplyTest() { DataTable input = new DataTable("Sample data"); input.Columns.Add("x", typeof(double)); input.Columns.Add("y", typeof(double)); input.Columns.Add("z", typeof(double)); input.Rows.Add(0.02, 60.6, 24.2); input.Rows.Add(0.92, 50.2, 21.1); input.Rows.Add(0.32, 60.9, 19.8); input.Rows.Add(2.02, 61.8, 92.4); // Create a discretization filter to operate on the first 2 columns Discretization target = new Discretization("x", "y"); target.Columns["y"].Threshold = 0.8; DataTable expected = new DataTable("Sample data"); expected.Columns.Add("x", typeof(double)); expected.Columns.Add("y", typeof(double)); expected.Columns.Add("z", typeof(double)); expected.Rows.Add(0, 60, 24.2); expected.Rows.Add(1, 50, 21.1); expected.Rows.Add(0, 61, 19.8); expected.Rows.Add(2, 62, 92.4); DataTable actual = target.Apply(input); for (int i = 0; i < actual.Rows.Count; i++) { double ex = (double)expected.Rows[i][0]; double ey = (double)expected.Rows[i][1]; double ez = (double)expected.Rows[i][2]; double ax = (double)actual.Rows[i][0]; double ay = (double)actual.Rows[i][1]; double az = (double)actual.Rows[i][2]; Assert.AreEqual(ex, ax); Assert.AreEqual(ey, ay); Assert.AreEqual(ez, az); } }
public void ApplyTest() { DataTable input = new DataTable("Sample data"); input.Columns.Add("x", typeof(double)); input.Columns.Add("y", typeof(double)); input.Columns.Add("z", typeof(double)); input.Rows.Add(0.02, 60.6, 24.2); input.Rows.Add(0.92, 50.2, 21.1); input.Rows.Add(0.32, 60.9, 19.8); input.Rows.Add(2.02, 61.8, 92.4); // Create a discretization filter to operate on the first 2 columns Discretization target = new Discretization("x","y"); target.Columns["y"].Threshold = 0.8; DataTable expected = new DataTable("Sample data"); expected.Columns.Add("x", typeof(double)); expected.Columns.Add("y", typeof(double)); expected.Columns.Add("z", typeof(double)); expected.Rows.Add(0, 60, 24.2); expected.Rows.Add(1, 50, 21.1); expected.Rows.Add(0, 61, 19.8); expected.Rows.Add(2, 62, 92.4); DataTable actual = target.Apply(input); for (int i = 0; i < actual.Rows.Count; i++) { double ex = (double)expected.Rows[i][0]; double ey = (double)expected.Rows[i][1]; double ez = (double)expected.Rows[i][2]; double ax = (double)actual.Rows[i][0]; double ay = (double)actual.Rows[i][1]; double az = (double)actual.Rows[i][2]; Assert.AreEqual(ex, ax); Assert.AreEqual(ey, ay); Assert.AreEqual(ez, az); } }
public void missing_values_thresholds_test() { DataTable input = new DataTable("Tennis Example with Missing Values"); input.Columns.Add("Day", typeof(string)); input.Columns.Add("Outlook", typeof(string)); input.Columns.Add("Temperature", typeof(int)); input.Columns.Add("Humidity", typeof(string)); input.Columns.Add("Wind", typeof(string)); input.Columns.Add("PlayTennis", typeof(string)); input.Rows.Add("D1", "Sunny", 35, "High", "Weak", "No"); input.Rows.Add("D2", null, 32, "High", "Strong", "No"); input.Rows.Add("D3", null, null, "High", null, "Yes"); input.Rows.Add("D4", "Rain", 25, "High", "Weak", "Yes"); input.Rows.Add("D5", "Rain", 16, null, "Weak", "Yes"); input.Rows.Add("D6", "Rain", 12, "Normal", "Strong", "No"); input.Rows.Add("D7", "Overcast", "18", "Normal", "Strong", "Yes"); input.Rows.Add("D8", null, 27, "High", null, "No"); input.Rows.Add("D9", null, 17, "Normal", "Weak", "Yes"); input.Rows.Add("D10", null, null, "Normal", null, "Yes"); input.Rows.Add("D11", null, 23, "Normal", null, "Yes"); input.Rows.Add("D12", "Overcast", 25, null, "Strong", "Yes"); input.Rows.Add("D13", "Overcast", 33, null, "Weak", "Yes"); input.Rows.Add("D14", "Rain", 24, "High", "Strong", "No"); Assert.AreEqual(14, input.Rows.Count); Assert.AreEqual(6, input.Columns.Count); var discretization = new Discretization <double, string>() { { "Temperature", x => x >= 30 && x < 50, "Hot" }, { "Temperature", x => x >= 20 && x < 30, "Mild" }, { "Temperature", x => x >= 00 && x < 20, "Cool" }, }; DataTable actual = discretization.Apply(input); Assert.AreEqual(14, actual.Rows.Count); Assert.AreEqual(6, actual.Columns.Count); DataTable expected = new DataTable("Tennis Example with Missing Values"); expected.Columns.Add("Day", typeof(string)); expected.Columns.Add("Outlook", typeof(string)); expected.Columns.Add("Temperature", typeof(string)); expected.Columns.Add("Humidity", typeof(string)); expected.Columns.Add("Wind", typeof(string)); expected.Columns.Add("PlayTennis", typeof(string)); expected.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); expected.Rows.Add("D2", null, "Hot", "High", "Strong", "No"); expected.Rows.Add("D3", null, null, "High", null, "Yes"); expected.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes"); expected.Rows.Add("D5", "Rain", "Cool", null, "Weak", "Yes"); expected.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No"); expected.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes"); expected.Rows.Add("D8", null, "Mild", "High", null, "No"); expected.Rows.Add("D9", null, "Cool", "Normal", "Weak", "Yes"); expected.Rows.Add("D10", null, null, "Normal", null, "Yes"); expected.Rows.Add("D11", null, "Mild", "Normal", null, "Yes"); expected.Rows.Add("D12", "Overcast", "Mild", null, "Strong", "Yes"); expected.Rows.Add("D13", "Overcast", "Hot", null, "Weak", "Yes"); expected.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No"); for (int j = 0; j < expected.Rows.Count; j++) { var erow = expected.Rows[j]; var arow = actual.Rows[j]; for (int i = 0; i < expected.Columns.Count; i++) { object e = erow[i]; object a = arow[i]; Assert.AreEqual(e, a); } } }
public void missing_values_thresholds_test() { #region doc_missing_thresholds // In this example, we will be using a modified version of the famous Play Tennis // example by Tom Mitchell (1998), where some values have been replaced by missing // values. We will use NaN double values to represent values missing from the data. // Note: this example uses DataTables to represent the input data, // but this is not required. The same could be performed using plain // double[][] matrices and vectors instead. DataTable data = new DataTable("Tennis Example with Missing Values"); data.Columns.Add("Day", typeof(string)); data.Columns.Add("Outlook", typeof(string)); data.Columns.Add("Temperature", typeof(int)); data.Columns.Add("Humidity", typeof(string)); data.Columns.Add("Wind", typeof(string)); data.Columns.Add("PlayTennis", typeof(string)); data.Rows.Add("D1", "Sunny", 35, "High", "Weak", "No"); data.Rows.Add("D2", null, 32, "High", "Strong", "No"); data.Rows.Add("D3", null, null, "High", null, "Yes"); data.Rows.Add("D4", "Rain", 25, "High", "Weak", "Yes"); data.Rows.Add("D5", "Rain", 16, null, "Weak", "Yes"); data.Rows.Add("D6", "Rain", 12, "Normal", "Strong", "No"); data.Rows.Add("D7", "Overcast", "18", "Normal", "Strong", "Yes"); data.Rows.Add("D8", null, 27, "High", null, "No"); data.Rows.Add("D9", null, 17, "Normal", "Weak", "Yes"); data.Rows.Add("D10", null, null, "Normal", null, "Yes"); data.Rows.Add("D11", null, 23, "Normal", null, "Yes"); data.Rows.Add("D12", "Overcast", 25, null, "Strong", "Yes"); data.Rows.Add("D13", "Overcast", 33, null, "Weak", "Yes"); data.Rows.Add("D14", "Rain", 24, "High", "Strong", "No"); string[] inputNames = new[] { "Outlook", "Temperature", "Humidity", "Wind" }; // Create a new discretization codebook to convert // the numbers above into discrete, string labels: var discretization = new Discretization <double, string>() { { "Temperature", x => x >= 30 && x < 50, "Hot" }, { "Temperature", x => x >= 20 && x < 30, "Mild" }, { "Temperature", x => x >= 00 && x < 20, "Cool" }, }; // Use the discretization to convert all the data DataTable discrete = discretization.Apply(data); // Create a new codification codebook to convert // the strings above into numeric, integer labels: var codebook = new Codification() { DefaultMissingValueReplacement = Double.NaN }; // Use the codebook to convert all the data DataTable symbols = codebook.Apply(discrete); // Grab the training input and output instances: double[][] inputs = symbols.ToJagged(inputNames); int[] outputs = symbols.ToArray <int>("PlayTennis"); // Create a new learning algorithm var teacher = new C45Learning() { Attributes = DecisionVariable.FromCodebook(codebook, inputNames) }; // Use the learning algorithm to induce a new tree: DecisionTree tree = teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // The classification error (~0.214) can be computed as double error = new ZeroOneLoss(outputs).Loss(predicted); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "PlayTennis", System.Globalization.CultureInfo.InvariantCulture); // The output should be: string expected = @"No =: (Outlook == Sunny) No =: (Outlook == Rain) && (Wind == Strong) Yes =: (Outlook == Overcast) Yes =: (Outlook == Rain) && (Wind == Weak) "; #endregion expected = expected.Replace("\r\n", Environment.NewLine); Assert.AreEqual(expected, ruleText); Assert.AreEqual(14, codebook["Day"].NumberOfSymbols); Assert.AreEqual(3, codebook["Outlook"].NumberOfSymbols); Assert.AreEqual(3, codebook["Temperature"].NumberOfSymbols); Assert.AreEqual(2, codebook["Humidity"].NumberOfSymbols); Assert.AreEqual(2, codebook["Wind"].NumberOfSymbols); Assert.AreEqual(2, codebook["PlayTennis"].NumberOfSymbols); foreach (var col in codebook) { Assert.AreEqual(Double.NaN, col.MissingValueReplacement); Assert.AreEqual(CodificationVariable.Ordinal, col.VariableType); } Assert.AreEqual(0.21428571428571427, error, 1e-10); Assert.AreEqual(4, tree.NumberOfInputs); Assert.AreEqual(2, tree.NumberOfOutputs); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.21428571428571427, newError, 1e-10); }