public DecisionTree replaceNodeByNewLeaf(Node removeNode) { if (removeNode.getParent() == null) { Console.WriteLine("Tried to prune root. You sure this Decision Tree makes sense?"); return(this); } // Create the new leaf List <DataInstance> total_set = new List <DataInstance>(); List <Node> queue = new List <Node>(); queue.Add(removeNode); // Get all instances that should be covered. while (queue.Count > 0) { Node node = queue[0]; queue.RemoveAt(0); foreach (Leaf child in node.getLeafChildren()) { total_set.AddRange(this.data_locations[child]); this.data_locations.Remove(child); } // Add child nodes to queue so their leafs also get added queue.AddRange(node.getNodeChildren()); } Node parent = removeNode.getParent(); // Remove the old node from its parent. if (parent != null) { // Make the new leaf string prediction = SetHelper.mostCommonClassifier(total_set, this.target_attribute); double uncertainty = (double)SetHelper.subset_errors(total_set, this.target_attribute) / (double)total_set.Count; Leaf newleaf = this.addUncertainLeaf(removeNode.value_splitter, prediction, parent, uncertainty); // Make sure we can access this leaf's new subset! this.data_locations[newleaf] = total_set; parent.removeChildNode(removeNode); } return(this); }
private DecisionTree pruneIterate(DecisionTree tree, List <Node> queue, string target_attribute) { // Manage queue. Node node = queue[0]; queue.RemoveAt(0); agent.THINK("consider-node-for-pruning").finish(); // Lets consider this node. List <DataInstance> node_set = new List <DataInstance>(); // Calculate error estimate of the leafs double leaf_estimated_errors = 0; int leaf_actual_errors = 0; foreach (Leaf child in SetHelper.all_leaf_children(node)) { List <DataInstance> leaf_set = tree.data_locations[child]; node_set.AddRange(leaf_set); // Calculate estimated error. int my_errors = SetHelper.subset_errors(leaf_set, target_attribute); leaf_actual_errors += my_errors; double errorRate = Calculator.confidenceIntervalExact(my_errors, leaf_set.Count, this.confidence); double estimatedError = errorRate * leaf_set.Count; leaf_estimated_errors += estimatedError; } // Calculate estimated error of node. int node_errors = SetHelper.subset_errors(node_set, target_attribute); double nodeErrorRate = Calculator.confidenceIntervalExact(node_errors, node_set.Count, this.confidence); double nodeEstimatedError = nodeErrorRate * node_set.Count; // Compare // If a node has a lower estimated error than its leafs, it should be pruned. Dictionary <string, object> state = StateRecording.generateState("estimated_prune_errors", nodeEstimatedError, "estimated_keep_errors", leaf_estimated_errors, "node_attribute", node.label, "node_data_size", node_set.Count, "node_id", node.identifier, "node_value_splitter", (node.value_splitter != null) ? node.value_splitter : "NULL", "node_threshold", (node is ContinuousNode) ? (double?)(node as ContinuousNode).threshold : null, "parent_id", (node.getParent() != null) ? node.getParent().identifier : "NULL", "parent_attribute", (node.getParent() != null) ? node.getParent().label : "NULL", "parent_threshold", (node.getParent() != null && node.getParent() is ContinuousNode) ? (double?)((ContinuousNode)node.getParent()).threshold : null); if (nodeEstimatedError < leaf_estimated_errors) { // We need to prune! this.prepareSnapshot(node); agent.THINK("prune-node").setState(state).finish(); tree = tree.replaceNodeByNewLeaf(node); } else { agent.THINK("keep-node").setState(state).finish(); } // Iterate further if necessary. if (queue.Count > 0) { tree = this.pruneIterate(tree, queue, target_attribute); } return(tree); }