/** * An implementation of the recursive decision tree * learning algorithm. Given a parent node and an arc * number, the method will attach a new decision 'sub'-tree * below the parent node. * * @param parent The parent node for the new decision tree. * * @param arcNum The arc number (or path) along which the * new subtree will be attached. * * @return true if an entire subtree was successfully added, * false otherwise. */ public bool learnDT(DecisionTreeNode parent, int arcNum) { AttributeMask mask; if (parent == null) { // We have to add at the root. mask = new AttributeMask(DatasetUse.getNumAttributes()); } else { mask = new AttributeMask(parent.getMask()); // Mask off the specified arc number. try { mask.mask(DatasetUse.getAttributePosition(parent.getLabel()), arcNum); } catch (Exception e) { //e.printStackTrace(); return false; } } // Now, classify the examples at the current position. int[] conclusion = new int[8]; int result = classifyExamples(mask, conclusion, null, null, null); Attribute target = DatasetUse.getTargetAttribute(); int numTargetVals = target.getNumValues(); String label; if (result == DATASET_EMPTY) { // If no examples reach our current position // we add a leaf with the most common target // classfication for the parent node. // Save testing results. int numTestingExamplesReachHere = conclusion[5]; int bestTestingTargetIndex = conclusion[4]; int numTestingExamplesCorrectClass = conclusion[6]; int numTrainingExamplesCorrectClass = conclusion[7]; classifyExamples(parent.getMask(), conclusion, null, null, null); try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { return false; } // We have to grab the counts again for the testing data... int[] currTestingCounts = new int[target.getNumValues()]; getExampleCounts(mask, DatasetUse.getTestingExamples(), currTestingCounts, null); // Mask target value and add a leaf to the tree. mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, 0, conclusion[0], 0, currTestingCounts[conclusion[0]], numTestingExamplesReachHere, bestTestingTargetIndex, numTestingExamplesCorrectClass, numTrainingExamplesCorrectClass); return true; } if (result == DATASET_IDENT_CONCL) { // Pure result - we can add a leaf node with the // correct target attribute value. try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { //e.printStackTrace(); return false; } // Mask target value and add a leaf to the tree. mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); return true; } // Mixed conclusion - so we have to select // an attribute to split on, and then build a // new internal node with that attribute. // First, generate statistics - this may take awhile. int[] nodeStats = new int[numTargetVals]; List<Attribute> availableAtts = generateStats(mask, nodeStats); if (availableAtts.Count == 0) { // No attributes left to split on - so use // the most common target value at the current position. try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { //e.printStackTrace(); return false; } mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); return true; } // Choose an attribute, based on the set of // available attributes. List<double> results = new List<double>(); Attribute att = chooseAttribute(availableAtts, nodeStats, results); int attPos; try { attPos = DatasetUse.getAttributePosition(att.getName()); } catch (Exception e) { //e.printStackTrace(); return false; } DecisionTreeNode newParent = Tree.addInternalNode(parent, arcNum, attPos, att, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); // Now, recursively decend along each branch of the new node. for (int j = 0; j < newParent.getArcLabelCount(); j++) { // Recursive call. if (!learnDT(newParent, j)) return false; } return true; }