public void new_method_create_tree() { string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>(); string[] labels = text.GetColumn(4); var codebook = new Codification("Output", labels); int[] outputs = codebook.Translate("Output", labels); // And we can use the C4.5 for learning: var teacher = new C45Learning(); // And finally induce the tree: var tree = teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // And the classification error can be computed as double error = new ZeroOneLoss(outputs) // 0.0266 { Mean = true }.Loss(tree.Decide(inputs)); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture); // The output is: string expected = @"Iris-setosa =: (2 <= 2.45) Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85) Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85) Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05) Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05) Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95) Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05) "; Assert.AreEqual(0.026666666666666668, error, 1e-10); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.026666666666666668, newError, 1e-10); Assert.AreEqual(expected, ruleText); }
private double run(double[][] inputs, int[] output) { int rows = inputs.Length; int cols = inputs[0].Length; int classes = output.DistinctCount(); int colsPerTree; if (CoverageRatio == 0) { colsPerTree = (int)(System.Math.Sqrt(cols)); } else { colsPerTree = (int)(cols * CoverageRatio); } var trees = forest.Trees; int[] idx = Classes.Random(output, classes, trees.Length); Parallel.For(0, trees.Length, i => { var x = inputs.Get(idx); var y = output.Get(idx); var c45 = new C45Learning(forest.Trees[i]) { MaxVariables = colsPerTree, Join = 100 }; c45.Learn(x, y); }); return 0; }
public void IrisDatasetTest() { #region doc_iris // In this example, we will process the famous Fisher's Iris dataset in // which the task is to classify weather the features of an Iris flower // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica: // // - https://en.wikipedia.org/wiki/Iris_flower_data_set // // First, let's load the dataset into an array of text that we can process string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); // The first four columns contain the flower features double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>(); // The last column contains the expected flower type string[] labels = text.GetColumn(4); // Since the labels are represented as text, the first step is to convert // those text labels into integer class labels, so we can process them // more easily. For this, we will create a codebook to encode class labels: // var codebook = new Codification("Output", labels); // With the codebook, we can convert the labels: int[] outputs = codebook.Translate("Output", labels); // Let's declare the names of our input variables: DecisionVariable[] features = { new DecisionVariable("sepal length", DecisionVariableKind.Continuous), new DecisionVariable("sepal width", DecisionVariableKind.Continuous), new DecisionVariable("petal length", DecisionVariableKind.Continuous), new DecisionVariable("petal width", DecisionVariableKind.Continuous), }; // Now, we can finally create our tree for the 3 classes: var tree = new DecisionTree(inputs: features, classes: 3); // And we can use the C4.5 for learning: var teacher = new C45Learning(tree); // And finally induce the tree: teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // And the classification error can be computed as double error = new ZeroOneLoss(outputs) // 0.0266 { Mean = true }.Loss(tree.Decide(inputs)); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture); // The output is: string expected = @"Iris-setosa =: (petal length <= 2.45) Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85) Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85) Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05) Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05) Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95) Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05) "; #endregion Assert.AreEqual(0.026666666666666668, error, 1e-10); Assert.AreEqual(4, tree.NumberOfInputs); Assert.AreEqual(3, tree.NumberOfOutputs); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.026666666666666668, newError, 1e-10); Assert.AreEqual(expected, ruleText); }
/// <summary> /// Creates and learns a Decision Tree to recognize the /// previously loaded dataset using the current settings. /// </summary> /// private void btnCreate_Click(object sender, EventArgs e) { if (dgvLearningSource.DataSource == null) { MessageBox.Show("Please load some data first."); return; } // Finishes and save any pending changes to the given data dgvLearningSource.EndEdit(); // Creates a matrix from the entire source data table double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames); // Get only the input vector values (first two columns) double[][] inputs = table.GetColumns(0, 1).ToArray(); // Get only the output labels (last column) int[] outputs = table.GetColumn(2).ToInt32(); // Specify the input variables DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous), new DecisionVariable("y", DecisionVariableKind.Continuous), }; // Create the C4.5 learning algorithm var c45 = new C45Learning(variables); // Learn the decision tree using C4.5 tree = c45.Learn(inputs, outputs); // Show the learned tree in the view decisionTreeView1.TreeSource = tree; // Get the ranges for each variable (X and Y) DoubleRange[] ranges = table.GetRange(0); // Generate a Cartesian coordinate system double[][] map = Matrix.Cartesian( Vector.Interval(ranges[0], 0.05), Vector.Interval(ranges[1], 0.05)); // Classify each point in the Cartesian coordinate system double[] result = map.Apply(tree.Compute).ToDouble(); double[,] surface = map.ToMatrix().InsertColumn(result); CreateScatterplot(zedGraphControl2, surface); lbStatus.Text = "Learning finished! Click the other tabs to explore results!"; }