/// <summary> /// Attaches a new leaf node to the supplied node, along /// the specified arc. /// </summary> public DecisionTreeNode addLeafNode(DecisionTreeNode parent, int arcNum, String label, AttributeMask mask, int numTrainingExamplesReachHere, int bestTrainingTargetIndex, int numTrainingEgsCorrectClassUsingBestTrainingIndex, int numTestingEgsCorrectClassUsingBestTrainingIndex, int numTestingExamplesReachHere, int bestTestingTargetIndex, int numTestingEgsCorrectClassUsingBestTestingIndex, int numTrainingEgsCorrectClassUsingBestTestingIndex) { // Create new leaf node. DecisionTreeNode leaf = new DecisionTreeNode(parent, label, null, mask); // Set the node statistics. leaf.setTrainingStats(numTrainingExamplesReachHere, bestTrainingTargetIndex, numTrainingEgsCorrectClassUsingBestTrainingIndex, numTestingEgsCorrectClassUsingBestTrainingIndex); leaf.setTestingStats(numTestingExamplesReachHere, bestTestingTargetIndex, numTestingEgsCorrectClassUsingBestTestingIndex, numTrainingEgsCorrectClassUsingBestTestingIndex); // Update the tree statistics. TrainingCorrect += numTrainingEgsCorrectClassUsingBestTrainingIndex; TestingCorrect += numTestingEgsCorrectClassUsingBestTrainingIndex; // Now, attach the new leaf to the supplied node. if (parent != null) { parent.setChild(arcNum, leaf); } // Add a reference to the new node to the node list. Nodes.Add(leaf); // Determine if the tree is complete. if (findIncompleteNode((DecisionTreeNode)Nodes[0], new int[1]) == null) { Complete = true; } return(leaf); }
/// <summary> /// Attaches a new internal node to the supplied node, /// along the specified arc. /// </summary> /// <param name="parent">The node in the current tree to attach /// the internal node to. If the node is null, the /// new internal node becomes the root of the tree.</param> /// <param name="arcNum">The arc number (or attribute value /// index) along which to attach the new node.</param> /// <param name="attributePosition">The position of the /// attribute used to split at the new node, relative /// to the other attributes in the dataset.</param> /// <param name="att">The attribute used to split at the new /// node.</param> /// <returns>A reference to the new internal node.</returns> public DecisionTreeNode addInternalNode(DecisionTreeNode parent, int arcNum, int attributePosition, Attribute att, AttributeMask mask, int numTrainingExamplesReachHere, int bestTrainingTargetIndex, int numTrainingEgsCorrectClassUsingBestTrainingIndex, int numTestingEgsCorrectClassUsingBestTrainingIndex, int numTestingExamplesReachHere, int bestTestingTargetIndex, int numTestingEgsCorrectClassUsingBestTestingIndex, int numTrainingEgsCorrectClassUsingBestTestingIndex) { // Create a new internal node. DecisionTreeNode internalNode = new DecisionTreeNode(parent, att.getName(), att.getValueNames(), mask); // Set the node statistics. internalNode.setTrainingStats(numTrainingExamplesReachHere, bestTrainingTargetIndex, numTrainingEgsCorrectClassUsingBestTrainingIndex, numTestingEgsCorrectClassUsingBestTrainingIndex); internalNode.setTestingStats(numTestingExamplesReachHere, bestTestingTargetIndex, numTestingEgsCorrectClassUsingBestTestingIndex, numTrainingEgsCorrectClassUsingBestTestingIndex); // Update the tree statistics. InternalNodes++; // Now, attach the new internal node to the supplied node. if (parent != null) { parent.setChild(arcNum, internalNode); } // Add a reference to the new node to the node list. Nodes.Add(internalNode); return(internalNode); }
/// <summary> /// Attaches a new internal node to the supplied node, /// along the specified arc. /// </summary> /// <param name="parent">The node in the current tree to attach /// the internal node to. If the node is null, the /// new internal node becomes the root of the tree.</param> /// <param name="arcNum">The arc number (or attribute value /// index) along which to attach the new node.</param> /// <param name="attributePosition">The position of the /// attribute used to split at the new node, relative /// to the other attributes in the dataset.</param> /// <param name="att">The attribute used to split at the new /// node.</param> /// <returns>A reference to the new internal node.</returns> public DecisionTreeNode addInternalNode(DecisionTreeNode parent, int arcNum, int attributePosition, Attribute att, AttributeMask mask, int numTrainingExamplesReachHere, int bestTrainingTargetIndex, int numTrainingEgsCorrectClassUsingBestTrainingIndex, int numTestingEgsCorrectClassUsingBestTrainingIndex, int numTestingExamplesReachHere, int bestTestingTargetIndex, int numTestingEgsCorrectClassUsingBestTestingIndex, int numTrainingEgsCorrectClassUsingBestTestingIndex) { // Create a new internal node. DecisionTreeNode internalNode = new DecisionTreeNode(parent, att.getName(), att.getValueNames(), mask); // Set the node statistics. internalNode.setTrainingStats(numTrainingExamplesReachHere, bestTrainingTargetIndex, numTrainingEgsCorrectClassUsingBestTrainingIndex, numTestingEgsCorrectClassUsingBestTrainingIndex); internalNode.setTestingStats(numTestingExamplesReachHere, bestTestingTargetIndex, numTestingEgsCorrectClassUsingBestTestingIndex, numTrainingEgsCorrectClassUsingBestTestingIndex); // Update the tree statistics. InternalNodes++; // Now, attach the new internal node to the supplied node. if (parent != null) parent.setChild(arcNum, internalNode); // Add a reference to the new node to the node list. Nodes.Add(internalNode); return internalNode; }
private int _trainTrainingCorrectClass; // Number of training examples #endregion Fields #region Constructors public DecisionTreeNode(DecisionTreeNode parent, String label, Object[] arcLabels, AttributeMask mask) { if (label == null || mask == null) throw new Exception("Label or attribute mask is null."); Parent = parent; NodeLabel = label; ArcLabels = arcLabels; AttMask = mask; // The number of possible children for the node is // equal to the number of values for this attribute. // If the arc label array was null, then this is // a leaf node that has no children. if (arcLabels != null) Children = new DecisionTreeNode[ArcLabels.Length]; // The node is initially unflagged. NodeFlag = -2; }
/** * An implementation of the recursive decision tree * learning algorithm. Given a parent node and an arc * number, the method will attach a new decision 'sub'-tree * below the parent node. * * @param parent The parent node for the new decision tree. * * @param arcNum The arc number (or path) along which the * new subtree will be attached. * * @return true if an entire subtree was successfully added, * false otherwise. */ public bool learnDT(DecisionTreeNode parent, int arcNum) { AttributeMask mask; if (parent == null) { // We have to add at the root. mask = new AttributeMask(DatasetUse.getNumAttributes()); } else { mask = new AttributeMask(parent.getMask()); // Mask off the specified arc number. try { mask.mask(DatasetUse.getAttributePosition(parent.getLabel()), arcNum); } catch (Exception e) { //e.printStackTrace(); return false; } } // Now, classify the examples at the current position. int[] conclusion = new int[8]; int result = classifyExamples(mask, conclusion, null, null, null); Attribute target = DatasetUse.getTargetAttribute(); int numTargetVals = target.getNumValues(); String label; if (result == DATASET_EMPTY) { // If no examples reach our current position // we add a leaf with the most common target // classfication for the parent node. // Save testing results. int numTestingExamplesReachHere = conclusion[5]; int bestTestingTargetIndex = conclusion[4]; int numTestingExamplesCorrectClass = conclusion[6]; int numTrainingExamplesCorrectClass = conclusion[7]; classifyExamples(parent.getMask(), conclusion, null, null, null); try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { return false; } // We have to grab the counts again for the testing data... int[] currTestingCounts = new int[target.getNumValues()]; getExampleCounts(mask, DatasetUse.getTestingExamples(), currTestingCounts, null); // Mask target value and add a leaf to the tree. mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, 0, conclusion[0], 0, currTestingCounts[conclusion[0]], numTestingExamplesReachHere, bestTestingTargetIndex, numTestingExamplesCorrectClass, numTrainingExamplesCorrectClass); return true; } if (result == DATASET_IDENT_CONCL) { // Pure result - we can add a leaf node with the // correct target attribute value. try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { //e.printStackTrace(); return false; } // Mask target value and add a leaf to the tree. mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); return true; } // Mixed conclusion - so we have to select // an attribute to split on, and then build a // new internal node with that attribute. // First, generate statistics - this may take awhile. int[] nodeStats = new int[numTargetVals]; List<Attribute> availableAtts = generateStats(mask, nodeStats); if (availableAtts.Count == 0) { // No attributes left to split on - so use // the most common target value at the current position. try { label = target.getAttributeValueByNum(conclusion[0]); } catch (Exception e) { //e.printStackTrace(); return false; } mask.mask(0, conclusion[0]); DecisionTreeNode node = Tree.addLeafNode(parent, arcNum, label, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); return true; } // Choose an attribute, based on the set of // available attributes. List<double> results = new List<double>(); Attribute att = chooseAttribute(availableAtts, nodeStats, results); int attPos; try { attPos = DatasetUse.getAttributePosition(att.getName()); } catch (Exception e) { //e.printStackTrace(); return false; } DecisionTreeNode newParent = Tree.addInternalNode(parent, arcNum, attPos, att, mask, conclusion[1], conclusion[0], conclusion[2], conclusion[3], conclusion[5], conclusion[4], conclusion[6], conclusion[7]); // Now, recursively decend along each branch of the new node. for (int j = 0; j < newParent.getArcLabelCount(); j++) { // Recursive call. if (!learnDT(newParent, j)) return false; } return true; }
/** * Generates statistics (used for splitting) based on the * current position in the tree (as defined by an * attribute mask). * * @return A Vector that contains the available attributes. * Each attribute's internal statistics array is * populated with appropriate data. The supplied * stats array is filled with counts of the number of * examples that fall into each of the target classes * at the current position in the tree. */ public List<Attribute> generateStats(AttributeMask mask, int[] stats) { // First, we fill the stats array - this is not the // most efficient approach, since we're looping through // the data several times. getExampleCounts(mask, DatasetUse.getTrainingExamples(), stats, null); // Now, we have to go through the attribute mask // and locate the attributes that are still available. List<Attribute> results = new List<Attribute>(); // Create a new mask that we can modify. AttributeMask newMask = new AttributeMask(mask); // We don't use position 0, that's where the target attribute is. for (int i = 1; i < mask.getNumAttributes(); i++) { if (newMask.isMasked(i) == AttributeMask.UNUSED) { // This attribute is available, so we calculate stats for it. Attribute att = null; try { att = DatasetUse.getAttributeByNum(i); } catch (Exception e) { // This can't happen!! return null; } int[][] attStats = att.getStatsArray(); // Modify the mask and fill in the arrays. for (int j = 0; j < att.getNumValues(); j++) { newMask.mask(i, j); getExampleCounts(newMask, DatasetUse.getTrainingExamples(), attStats[j], null); } // Reset the mask. newMask.unmask(i); results.Add(att); } } return results; }
/** * Classifies all examples in the current set of * examples, by target attribute value. The * attribute mask determines which examples from the * dataset form the current example set. * * @param mask The current attribute mask that * indicates which examples from the dataset * should be considered. * * @param conclusion The method expects the parameter * to be an array of size 8. Positions in * the array are filled with the following * values. * * <ul> * <li><i>Position 0</i> - Records the * index of the most common target attribute * value from the training dataset. * * <li><i>Position 1</i> - Records the number * of training examples from the dataset that * reach the current position. * * <li><i>Position 2</i> - Records the number * of training examples from the dataset * that would be correcly classified * <i>if a leaf with the most common training * target classification</i> were added at the * current position. * * <li><i>Position 3</i> - Records the number * if testing examples from the dataset * that would be correctly classified * <i>if a leaf with the most common training * target classification</i> were added at the * current position. * * <li><i>Position 4</i> - Records the index * of the most common target attribute * value from the testing dataset. * * <li><i>Position 5</i> - Records the number * of testing examples from the dataset that * reach the current position. * * <li><i>Position 6</i> - Records the number * of testing examples from the dataset * that would be correcly classified * <i>if a leaf with the most common testing * target classification</i> were added at the * current position. * * <li><i>Position 7</i> - Records the number * if training examples from the dataset * that would be correctly classified * <i>if a leaf with the most common testing * target classification</i> were added at the * current position. * </ul> * * @param trainingCounts The method expects the parameter to be * an array with a size equal to the number of * target attribute values. Each position in * the array is filled with a corresponding count * of the number of training examples that fall into * that particular target class, at the current * position in the tree. This parameter can be null * if training count data is not required. * * @param testingCounts The method expects the parameter to be * an array with a size equal to the number of * target attribute values. Each position in the * array is filled with a corresponding count of * the number of testing examples that fall into * that particular target class, at the current * position in the tree. This parameter can be null * if testing count data is not required. * * @param examples The method expects the parameter to be * an array with a size equal to the number of * training examples in the dataset. Each entry in * the array is marked with true or false, depending * on whether or not a particular example reaches * the current position. * * @return DATASET_MIXED_CONCL if the examples have * multiple, different target attribute values. * DATASET_IDENT_CONCL if all the exmamples have * the same target attribute value. * DATASET_EMPTY if there are no examples in the * current example set. * * <p> * If the result is DATASET_IDENT_CONCL, the * index of the single target attribute value * is returned in <code>conclusion[0]</code>. If * the result is DATASET_EMPTY, the index of the * most common target attribute value is returned * in <code>conclusion[0]</code>. */ public int classifyExamples(AttributeMask mask, int[] conclusion, int[] trainingCounts, int[] testingCounts, bool[] examples) { if (mask == null || conclusion == null) throw new Exception("Mask or conclusion array is null."); // Determine the number of target attribute values // and create some storage space for our counts. int[] currTrainingCounts = null; int[] currTestingCounts = null; if (trainingCounts != null) currTrainingCounts = trainingCounts; else currTrainingCounts = new int[DatasetUse.getTargetAttribute().getNumValues()]; if (testingCounts != null) currTestingCounts = testingCounts; else currTestingCounts = new int[currTrainingCounts.Length]; getExampleCounts(mask, DatasetUse.getTrainingExamples(), currTrainingCounts, examples); getExampleCounts(mask, DatasetUse.getTestingExamples(), currTestingCounts, null); // Init results. conclusion[0] = 0; // Training target attribute value index. conclusion[1] = 0; // Total number of training examples // reaching node. conclusion[2] = 0; // Number of training examples correctly // classified if this node were a leaf // with most common training target value. conclusion[3] = 0; // Number of testing examples correctly // classified if this node were a leaf // with most common training target value. conclusion[4] = 0; // Testing target attribute value index. conclusion[5] = 0; // Total number of testing examples // reaching node. conclusion[6] = 0; // Number of testing examples correctly // classified if this node were a leaf // with most common testing target value. conclusion[7] = 0; // Number of training examples correctly // classified if this node were a leaf // with most common testing target value. // Examine the results and determine the conclusion. int result = DATASET_EMPTY; for (int i = 0; i < currTrainingCounts.Length; i++) { // Increment # of examples that reach this position. conclusion[1] += currTrainingCounts[i]; conclusion[5] += currTestingCounts[i]; if (result == DATASET_EMPTY && currTrainingCounts[i] != 0) result = DATASET_IDENT_CONCL; else if (result == DATASET_IDENT_CONCL && currTrainingCounts[i] != 0) result = DATASET_MIXED_CONCL; if (currTrainingCounts[i] >= currTrainingCounts[conclusion[0]]) { // This target value is more common in the training set. conclusion[0] = i; conclusion[2] = currTrainingCounts[i]; conclusion[3] = currTestingCounts[i]; } if (currTestingCounts[i] >= currTestingCounts[conclusion[4]]) { // This target value is more common in the testing set. conclusion[4] = i; conclusion[6] = currTestingCounts[i]; conclusion[7] = currTrainingCounts[i]; } } return result; }
/** * Fills the supplied array with the number of examples * from the current dataset that fall into each of the * target categories (based on the attribute mask). * * @param mask The mask that determines which examples * reach the current position in the decision * tree. * * @param examples An iteration over a series of * examples from the current dataset. * * @param counts The method expects the parameter to be * an array with a size equal to the number of * target attribute values. Each position in * the array is filled with a corresponding count * of the number of training examples that fall into * that particular target class, at the current * position in the decision tree. * * @param reachedHere The method expects the parameter * to be an array with a size equal to the * <i>total</i> number of examples being examined. * Each cell in the array is set to true or * false, depending on whether or not the * corresponding example reaches the current * position in the decision tree. */ private void getExampleCounts(AttributeMask mask, IEnumerator<int[]> examples, int[] counts, bool[] reachedHere) { // Zero any values currently in stats. for (int i = 0; i < counts.Length; i++) { counts[i] = 0; } int j = 0; // Loop through and get totals. while (examples.MoveNext()) { int[] example = (int[])examples.Current; if (mask.matchMask(example)) { counts[example[0]]++; // Increment appropriate // target count. if (reachedHere != null && reachedHere.Length > j) reachedHere[j] = true; } j++; } }
/// <summary> /// Builds a new mask, copying the state of the /// supplied mask to the new mask. /// </summary> /// <param name="mask">An Attribute mask</param> public AttributeMask(AttributeMask mask) { Mask = (int[])mask.Mask.Clone(); }
/// <summary> /// Attaches a new leaf node to the supplied node, along /// the specified arc. /// </summary> public DecisionTreeNode addLeafNode(DecisionTreeNode parent, int arcNum, String label, AttributeMask mask, int numTrainingExamplesReachHere, int bestTrainingTargetIndex, int numTrainingEgsCorrectClassUsingBestTrainingIndex, int numTestingEgsCorrectClassUsingBestTrainingIndex, int numTestingExamplesReachHere, int bestTestingTargetIndex, int numTestingEgsCorrectClassUsingBestTestingIndex, int numTrainingEgsCorrectClassUsingBestTestingIndex) { // Create new leaf node. DecisionTreeNode leaf = new DecisionTreeNode(parent, label, null, mask); // Set the node statistics. leaf.setTrainingStats(numTrainingExamplesReachHere, bestTrainingTargetIndex, numTrainingEgsCorrectClassUsingBestTrainingIndex, numTestingEgsCorrectClassUsingBestTrainingIndex); leaf.setTestingStats(numTestingExamplesReachHere, bestTestingTargetIndex, numTestingEgsCorrectClassUsingBestTestingIndex, numTrainingEgsCorrectClassUsingBestTestingIndex); // Update the tree statistics. TrainingCorrect += numTrainingEgsCorrectClassUsingBestTrainingIndex; TestingCorrect += numTestingEgsCorrectClassUsingBestTrainingIndex; // Now, attach the new leaf to the supplied node. if (parent != null) parent.setChild(arcNum, leaf); // Add a reference to the new node to the node list. Nodes.Add(leaf); // Determine if the tree is complete. if (findIncompleteNode((DecisionTreeNode)Nodes[0], new int[1]) == null) { Complete = true; } return leaf; }