public DecisionTree replaceNodeByNewLeaf(Node removeNode)
        {
            if (removeNode.getParent() == null)
            {
                Console.WriteLine("Tried to prune root. You sure this Decision Tree makes sense?");
                return(this);
            }
            // Create the new leaf
            List <DataInstance> total_set = new List <DataInstance>();
            List <Node>         queue     = new List <Node>();

            queue.Add(removeNode);

            // Get all instances that should be covered.
            while (queue.Count > 0)
            {
                Node node = queue[0];
                queue.RemoveAt(0);

                foreach (Leaf child in node.getLeafChildren())
                {
                    total_set.AddRange(this.data_locations[child]);
                    this.data_locations.Remove(child);
                }

                // Add child nodes to queue so their leafs also get added
                queue.AddRange(node.getNodeChildren());
            }

            Node parent = removeNode.getParent();

            // Remove the old node from its parent.
            if (parent != null)
            {
                // Make the new leaf
                string prediction  = SetHelper.mostCommonClassifier(total_set, this.target_attribute);
                double uncertainty = (double)SetHelper.subset_errors(total_set, this.target_attribute) / (double)total_set.Count;
                Leaf   newleaf     = this.addUncertainLeaf(removeNode.value_splitter, prediction, parent, uncertainty);
                // Make sure we can access this leaf's new subset!
                this.data_locations[newleaf] = total_set;

                parent.removeChildNode(removeNode);
            }
            return(this);
        }
Exemple #2
0
        private DecisionTree pruneIterate(DecisionTree tree, List <Node> queue, string target_attribute)
        {
            // Manage queue.
            Node node = queue[0];

            queue.RemoveAt(0);

            agent.THINK("consider-node-for-pruning").finish();

            // Lets consider this node.
            List <DataInstance> node_set = new List <DataInstance>();

            // Calculate error estimate of the leafs
            double leaf_estimated_errors = 0;
            int    leaf_actual_errors    = 0;

            foreach (Leaf child in SetHelper.all_leaf_children(node))
            {
                List <DataInstance> leaf_set = tree.data_locations[child];
                node_set.AddRange(leaf_set);

                // Calculate estimated error.
                int my_errors = SetHelper.subset_errors(leaf_set, target_attribute);
                leaf_actual_errors += my_errors;
                double errorRate      = Calculator.confidenceIntervalExact(my_errors, leaf_set.Count, this.confidence);
                double estimatedError = errorRate * leaf_set.Count;
                leaf_estimated_errors += estimatedError;
            }

            // Calculate estimated error of node.
            int    node_errors        = SetHelper.subset_errors(node_set, target_attribute);
            double nodeErrorRate      = Calculator.confidenceIntervalExact(node_errors, node_set.Count, this.confidence);
            double nodeEstimatedError = nodeErrorRate * node_set.Count;

            // Compare
            // If a node has a lower estimated error than its leafs, it should be pruned.
            Dictionary <string, object> state = StateRecording.generateState("estimated_prune_errors", nodeEstimatedError, "estimated_keep_errors", leaf_estimated_errors,
                                                                             "node_attribute", node.label, "node_data_size", node_set.Count, "node_id", node.identifier, "node_value_splitter", (node.value_splitter != null) ? node.value_splitter : "NULL",
                                                                             "node_threshold", (node is ContinuousNode) ? (double?)(node as ContinuousNode).threshold : null,
                                                                             "parent_id", (node.getParent() != null) ? node.getParent().identifier : "NULL", "parent_attribute", (node.getParent() != null) ? node.getParent().label : "NULL", "parent_threshold", (node.getParent() != null && node.getParent() is ContinuousNode) ? (double?)((ContinuousNode)node.getParent()).threshold : null);

            if (nodeEstimatedError < leaf_estimated_errors)
            {
                // We need to prune!
                this.prepareSnapshot(node);
                agent.THINK("prune-node").setState(state).finish();

                tree = tree.replaceNodeByNewLeaf(node);
            }
            else
            {
                agent.THINK("keep-node").setState(state).finish();
            }

            // Iterate further if necessary.
            if (queue.Count > 0)
            {
                tree = this.pruneIterate(tree, queue, target_attribute);
            }
            return(tree);
        }