public void new_method_create_tree()
        {
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);
            int[] outputs = codebook.Translate("Output", labels);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning();

            // And finally induce the tree:
            var tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (2 <= 2.45)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)
Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)
Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)
";

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
        private double run(double[][] inputs, int[] output)
        {
            int rows = inputs.Length;
            int cols = inputs[0].Length;
            int classes = output.DistinctCount();

            int colsPerTree;

            if (CoverageRatio == 0)
            {
                colsPerTree = (int)(System.Math.Sqrt(cols));
            }
            else
            {
                colsPerTree = (int)(cols * CoverageRatio);
            }

            var trees = forest.Trees;
            int[] idx = Classes.Random(output, classes, trees.Length);

            Parallel.For(0, trees.Length, i =>
            {
                var x = inputs.Get(idx);
                var y = output.Get(idx);

                var c45 = new C45Learning(forest.Trees[i])
                {
                    MaxVariables = colsPerTree,
                    Join = 100
                };

                c45.Learn(x, y);
            });

            return 0;
        }
        public void IrisDatasetTest()
        {
            #region doc_iris
            // In this example, we will process the famous Fisher's Iris dataset in 
            // which the task is to classify weather the features of an Iris flower 
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // Let's declare the names of our input variables:
            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("sepal width", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal width", DecisionVariableKind.Continuous), 
            };

            // Now, we can finally create our tree for the 3 classes:
            var tree = new DecisionTree(inputs: features, classes: 3);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning(tree);

            // And finally induce the tree:
            teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);
            
            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (petal length <= 2.45)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";
            #endregion

            Assert.AreEqual(0.026666666666666668, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(3, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Beispiel #4
0
        /// <summary>
        ///   Creates and learns a Decision Tree to recognize the
        ///   previously loaded dataset using the current settings.
        /// </summary>
        /// 
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();



            // Creates a matrix from the entire source data table
            double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames);

            // Get only the input vector values (first two columns)
            double[][] inputs = table.GetColumns(0, 1).ToArray();

            // Get only the output labels (last column)
            int[] outputs = table.GetColumn(2).ToInt32();


            // Specify the input variables
            DecisionVariable[] variables = 
            {
                new DecisionVariable("x", DecisionVariableKind.Continuous),
                new DecisionVariable("y", DecisionVariableKind.Continuous),
            };

            // Create the C4.5 learning algorithm
            var c45 = new C45Learning(variables);

            // Learn the decision tree using C4.5
            tree = c45.Learn(inputs, outputs);

            // Show the learned tree in the view
            decisionTreeView1.TreeSource = tree;


            // Get the ranges for each variable (X and Y)
            DoubleRange[] ranges = table.GetRange(0);

            // Generate a Cartesian coordinate system
            double[][] map = Matrix.Cartesian(
                Vector.Interval(ranges[0], 0.05),
                Vector.Interval(ranges[1], 0.05));

            // Classify each point in the Cartesian coordinate system
            double[] result = map.Apply(tree.Compute).ToDouble();
            double[,] surface = map.ToMatrix().InsertColumn(result);

            CreateScatterplot(zedGraphControl2, surface);

            lbStatus.Text = "Learning finished! Click the other tabs to explore results!";
        }