Error-based pruning.

References: Lior Rokach, Oded Maimon. The Data Mining and Knowledge Discovery Handbook, Chapter 9, Decision Trees. Springer, 2nd ed. 2010, XX, 1285 p. 40 illus. Available at: http://www.ise.bgu.ac.il/faculty/liorr/hbchap9.pdf .

Example #1
0
        public void RunTest()
        {
            double[][] inputs;
            int[] outputs;

            int training = 6000;
            DecisionTree tree = ReducedErrorPruningTest.createNurseryExample(out inputs, out outputs, training);

            int nodeCount = 0;
            foreach (var node in tree)
                nodeCount++;

            var pruningInputs = inputs.Submatrix(training, inputs.Length - 1);
            var pruningOutputs = outputs.Submatrix(training, inputs.Length - 1);
            ErrorBasedPruning prune = new ErrorBasedPruning(tree, pruningInputs, pruningOutputs);

            prune.Threshold = 0.1;

            double lastError, error = Double.PositiveInfinity;
            do
            {
                lastError = error;
                error = prune.Run();
            } while (error < lastError);

            int nodeCount2 = 0;
            foreach (var node in tree)
                nodeCount2++;

            Assert.AreEqual(0.25459770114942532, error);
            Assert.AreEqual(447, nodeCount);
            Assert.AreEqual(193, nodeCount2);
        }
        private static void repeat(double[][] inputs, int[] outputs, 
            DecisionTree tree, int training, double threshold, 
            out int nodeCount2)
        {
            int nodeCount = 0;
            foreach (var node in tree)
                nodeCount++;

            var pruningInputs = inputs.Submatrix(training, inputs.Length - 1);
            var pruningOutputs = outputs.Submatrix(training, inputs.Length - 1);
            ErrorBasedPruning prune = new ErrorBasedPruning(tree, pruningInputs, pruningOutputs);

            prune.Threshold = threshold;

            double lastError;
            double error = Double.PositiveInfinity;

            do
            {
                lastError = error;
                error = prune.Run();
            } while (error < lastError);

            nodeCount2 = 0;
            foreach (var node in tree)
                nodeCount2++;
        }