/** * An implementation of the recursive decision tree * learning algorithm. Given a parent node and an arc * number, the method will attach a new decision 'sub'-tree * below the parent node. * * @param parent The parent node for the new decision tree. * * @param arcNum The arc number (or path) along which the * new subtree will be attached. * * @return true if an entire subtree was successfully added, * false otherwise. */ public bool learnDT(DecisionTreeNode parent, int arcNum) { AttributeMask mask; if (parent == null) { // We have to add at the root. mask = new AttributeMask(DatasetUse.getNumAttributes()); } else { mask = new AttributeMask(parent.getMask()); // Mask off the specified arc number. try { mask.mask(DatasetUse.getAttributePosition(parent.getLabel()), arcNum); } catch (Exception e) { //e.printStackTrace(); return false; } } // Now, classify the examples at the current position. int[] conclusion = new int[8]; int result = classifyExamples(mask, conclusion, null, null, null); Attribute target = DatasetUse.getTargetAttribute(); int numTargetVals = target.getNumValues(); String label; if (result == DATASET_EMPTY) { // If no examples reach our current position // we add a leaf with the most common target // classfication for the parent node. // Save testing results. int numTestingExamplesReachHere = conclusion[5]; int bestTestingTargetIndex = conclusion[4]; int numTestingExamplesCorrectClass = conclusion[6]; int numTrainingExamplesCorrectClass = conclusion[7]; classifyExamples(parent.getMask(), conclusion, null, null, null); try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { return false; } // We have to grab the counts again for the testing data... int[] currTestingCounts = new int[target.getNumValues()]; getExampleCounts(mask, DatasetUse.getTestingExamples(), currTestingCounts, null); // Mask target value and add a leaf to the tree. mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, 0, conclusion[0], 0, currTestingCounts[conclusion[0]], numTestingExamplesReachHere, bestTestingTargetIndex, numTestingExamplesCorrectClass, numTrainingExamplesCorrectClass); return true; } if (result == DATASET_IDENT_CONCL) { // Pure result - we can add a leaf node with the // correct target attribute value. try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { //e.printStackTrace(); return false; } // Mask target value and add a leaf to the tree. mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); return true; } // Mixed conclusion - so we have to select // an attribute to split on, and then build a // new internal node with that attribute. // First, generate statistics - this may take awhile. int[] nodeStats = new int[numTargetVals]; List<Attribute> availableAtts = generateStats(mask, nodeStats); if (availableAtts.Count == 0) { // No attributes left to split on - so use // the most common target value at the current position. try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { //e.printStackTrace(); return false; } mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); return true; } // Choose an attribute, based on the set of // available attributes. List<double> results = new List<double>(); Attribute att = chooseAttribute(availableAtts, nodeStats, results); int attPos; try { attPos = DatasetUse.getAttributePosition(att.getName()); } catch (Exception e) { //e.printStackTrace(); return false; } DecisionTreeNode newParent = Tree.addInternalNode(parent, arcNum, attPos, att, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); // Now, recursively decend along each branch of the new node. for (int j = 0; j < newParent.getArcLabelCount(); j++) { // Recursive call. if (!learnDT(newParent, j)) return false; } return true; }
/** * An implementation of the recursive decision tree * pessimistic pruning algorithm. Given a parent * node, the method will prune all the branches * below the node. * * @param node The root node of the tree to prune. * * @param error A <code>double</code> array of size 1. The * array is used to store the current error value. * * @return <code>true</code> if an entire subtree was successfully * pruned, or <code>false</code> otherwise. */ public bool prunePessimisticDT(DecisionTreeNode node, double[] error) { // Post-order walk through the tree, marking // our path as we go along. if (node.isLeaf()) { if (node.getTrainingEgsAtNode() == 0) { error[0] = 0; return true; } else { // We do the error calculation in two steps - // Here we multiply the error value by the number // of examples that reach the node. When the method // is called recursively, this value will be divided // by the number of examples that reach the parent // node (thus weighting the error from each child). int errors1 = (int)node.getTrainingEgsAtNode() - node.getTrainingEgsCorrectClassUsingBestTrainingIndex(); double p1 = (double)(errors1 + 1.0) / (node.getTrainingEgsAtNode() + 2.0); error[0] = node.getTrainingEgsAtNode() * errorBar(p1, node.getTrainingEgsAtNode()) + errors1; return true; } } // We're at an internal node, so compute the error // of the children and use the result to determine // if we prune or not. double errorSum = 0; for (int i = 0; i < node.getArcLabelCount(); i++) { // Mark our current path. Tree.flagNode(node, i); if (!prunePessimisticDT(node.getChild(i), error)) { Tree.flagNode(node, -2); return false; } errorSum += error[0]; } // Mark the node as our current target. Tree.flagNode(node, -1); // Get the worst-case performance of this node. double errorWorst; if (node.getTrainingEgsAtNode() == 0) { error[0] = 0; return true; } int errors = (int)node.getTrainingEgsAtNode() - node.getTrainingEgsCorrectClassUsingBestTrainingIndex(); double p = (double)(errors + 1.0) / (node.getTrainingEgsAtNode() + 2.0); errorWorst = (double)node.getTrainingEgsAtNode() * errorBar(p, node.getTrainingEgsAtNode()) + errors; DecisionTreeNode newNode = node; if (errorWorst < errorSum) { // We need to "prune" this node to a leaf. DecisionTreeNode parent = node.getParent(); int arcNum = -1; if (parent != null) { arcNum = parent.getChildPosition(node); } Tree.pruneSubtree(node); // Figure out the label for the new leaf. String label = null; try { label = DatasetUse.getTargetAttribute().getAttributeValueByNum(node.getTrainingBestTarget()); } catch (Exception e) { // Should never happen. //e.printStackTrace(); } node.getMask().mask(0, node.getTrainingBestTarget()); newNode = Tree.addLeafNode(parent, arcNum, label, node.getMask(), node.getTrainingEgsAtNode(), node.getTrainingBestTarget(), node.getTrainingEgsCorrectClassUsingBestTrainingIndex(), node.getTestingEgsCorrectClassUsingBestTrainingIndex(), node.getTestingEgsAtNode(), node.getTestingBestTarget(), node.getTestingEgsCorrectClassUsingBestTestingIndex(), node.getTrainingEgsCorrectClassUsingBestTestingIndex()); } // Update the count. if (newNode.isLeaf()) { error[0] = errorWorst; } else { error[0] = errorSum; } // All finished, unmark the node if it still exists. Tree.flagNode(node, -2); return true; }
/** * An implementation of the recursive decision tree * reduced error pruning algorithm. Given a parent * node, the method will prune all the branches * below the node. * * @param node The root node of the tree to prune. * * @param error A <code>double</code> array of size 1. The * array is used to store the current error value. * * @return <code>true</code> if an entire subtree was successfully * pruned, or <code>false</code> otherwise. */ public bool pruneReducedErrorDT(DecisionTreeNode node, double[] error) { if (node.isLeaf()) { error[0] = node.getTestingEgsAtNode() - node.getTestingEgsCorrectClassUsingBestTrainingIndex(); return true; } // We're at an internal node, so compute the error // of the children and use the result to determine // if we prune or not. double errorSum = 0; for (int i = 0; i < node.getArcLabelCount(); i++) { // Mark our current path. Tree.flagNode(node, i); if (!pruneReducedErrorDT(node.getChild(i), error)) { Tree.flagNode(node, -2); return false; } errorSum += error[0]; } // Mark the node as our current target. Tree.flagNode(node, -1); // Get the best-case performance of this node. double errorBest = node.getTestingEgsAtNode() - node.getTestingEgsCorrectClassUsingBestTestingIndex(); DecisionTreeNode newNode = node; if (errorBest < errorSum) { // We need to "prune" this node to a leaf. DecisionTreeNode parent = node.getParent(); int arcNum = -1; if (parent != null) { arcNum = parent.getChildPosition(node); } Tree.pruneSubtree(node); // Figure out the label for the new leaf. String label = null; try { label = DatasetUse.getTargetAttribute().getAttributeValueByNum(node.getTestingBestTarget()); } catch (Exception e) { // Should never happen. //e.printStackTrace(); } node.getMask().mask(0, node.getTestingBestTarget()); newNode = Tree.addLeafNode(parent, arcNum, label, node.getMask(), node.getTrainingEgsAtNode(), node.getTestingBestTarget(), node.getTrainingEgsCorrectClassUsingBestTestingIndex(), node.getTestingEgsCorrectClassUsingBestTestingIndex(), node.getTestingEgsAtNode(), node.getTestingBestTarget(), node.getTestingEgsCorrectClassUsingBestTestingIndex(), node.getTrainingEgsCorrectClassUsingBestTestingIndex()); } // Update the count. if (newNode.isLeaf()) { error[0] = errorBest; } else { error[0] = errorSum; } // All finished, unmark the node if it still exists. Tree.flagNode(node, -2); return true; }