示例#1
0
        /// <summary>
        /// Attaches a new leaf node to the supplied node, along
        /// the specified arc.
        /// </summary>
        public DecisionTreeNode addLeafNode(DecisionTreeNode parent,
                                            int arcNum,
                                            String label,
                                            AttributeMask mask,
                                            int numTrainingExamplesReachHere,
                                            int bestTrainingTargetIndex,
                                            int numTrainingEgsCorrectClassUsingBestTrainingIndex,
                                            int numTestingEgsCorrectClassUsingBestTrainingIndex,
                                            int numTestingExamplesReachHere,
                                            int bestTestingTargetIndex,
                                            int numTestingEgsCorrectClassUsingBestTestingIndex,
                                            int numTrainingEgsCorrectClassUsingBestTestingIndex)
        {
            // Create new leaf node.
            DecisionTreeNode leaf =
                new DecisionTreeNode(parent, label, null, mask);

            // Set the node statistics.
            leaf.setTrainingStats(numTrainingExamplesReachHere,
                                  bestTrainingTargetIndex,
                                  numTrainingEgsCorrectClassUsingBestTrainingIndex,
                                  numTestingEgsCorrectClassUsingBestTrainingIndex);

            leaf.setTestingStats(numTestingExamplesReachHere,
                                 bestTestingTargetIndex,
                                 numTestingEgsCorrectClassUsingBestTestingIndex,
                                 numTrainingEgsCorrectClassUsingBestTestingIndex);

            // Update the tree statistics.
            TrainingCorrect += numTrainingEgsCorrectClassUsingBestTrainingIndex;
            TestingCorrect  += numTestingEgsCorrectClassUsingBestTrainingIndex;

            // Now, attach the new leaf to the supplied node.
            if (parent != null)
            {
                parent.setChild(arcNum, leaf);
            }

            // Add a reference to the new node to the node list.
            Nodes.Add(leaf);

            // Determine if the tree is complete.
            if (findIncompleteNode((DecisionTreeNode)Nodes[0], new int[1]) == null)
            {
                Complete = true;
            }

            return(leaf);
        }
示例#2
0
        /// <summary>
        /// Attaches a new internal node to the supplied node,
        /// along the specified arc.
        /// </summary>
        /// <param name="parent">The node in the current tree to attach
        /// the internal node to.  If the node is null, the
        /// new internal node becomes the root of the tree.</param>
        /// <param name="arcNum">The arc number (or attribute value
        /// index) along which to attach the new node.</param>
        /// <param name="attributePosition">The position of the
        /// attribute used to split at the new node, relative
        /// to the other attributes in the dataset.</param>
        /// <param name="att">The attribute used to split at the new
        /// node.</param>
        /// <returns>A reference to the new internal node.</returns>
        public DecisionTreeNode addInternalNode(DecisionTreeNode parent,
                                                int arcNum,
                                                int attributePosition,
                                                Attribute att,
                                                AttributeMask mask,
                                                int numTrainingExamplesReachHere,
                                                int bestTrainingTargetIndex,
                                                int numTrainingEgsCorrectClassUsingBestTrainingIndex,
                                                int numTestingEgsCorrectClassUsingBestTrainingIndex,
                                                int numTestingExamplesReachHere,
                                                int bestTestingTargetIndex,
                                                int numTestingEgsCorrectClassUsingBestTestingIndex,
                                                int numTrainingEgsCorrectClassUsingBestTestingIndex)
        {
            // Create a new internal node.
            DecisionTreeNode internalNode = new DecisionTreeNode(parent, att.getName(), att.getValueNames(), mask);

            // Set the node statistics.
            internalNode.setTrainingStats(numTrainingExamplesReachHere,
                                          bestTrainingTargetIndex,
                                          numTrainingEgsCorrectClassUsingBestTrainingIndex,
                                          numTestingEgsCorrectClassUsingBestTrainingIndex);

            internalNode.setTestingStats(numTestingExamplesReachHere,
                                         bestTestingTargetIndex,
                                         numTestingEgsCorrectClassUsingBestTestingIndex,
                                         numTrainingEgsCorrectClassUsingBestTestingIndex);

            // Update the tree statistics.
            InternalNodes++;

            // Now, attach the new internal node to the supplied node.
            if (parent != null)
            {
                parent.setChild(arcNum, internalNode);
            }

            // Add a reference to the new node to the node list.
            Nodes.Add(internalNode);

            return(internalNode);
        }
        /// <summary>
        /// Attaches a new internal node to the supplied node,
        /// along the specified arc.
        /// </summary>
        /// <param name="parent">The node in the current tree to attach
        /// the internal node to.  If the node is null, the
        /// new internal node becomes the root of the tree.</param>
        /// <param name="arcNum">The arc number (or attribute value
        /// index) along which to attach the new node.</param>
        /// <param name="attributePosition">The position of the
        /// attribute used to split at the new node, relative
        /// to the other attributes in the dataset.</param>
        /// <param name="att">The attribute used to split at the new
        /// node.</param>       
        /// <returns>A reference to the new internal node.</returns>
        public DecisionTreeNode addInternalNode(DecisionTreeNode parent,
            int arcNum,
            int attributePosition,
            Attribute att,
            AttributeMask mask,
            int numTrainingExamplesReachHere,
            int bestTrainingTargetIndex,
            int numTrainingEgsCorrectClassUsingBestTrainingIndex,
            int numTestingEgsCorrectClassUsingBestTrainingIndex,
            int numTestingExamplesReachHere,
            int bestTestingTargetIndex,
            int numTestingEgsCorrectClassUsingBestTestingIndex,
            int numTrainingEgsCorrectClassUsingBestTestingIndex)
        {
            // Create a new internal node.
            DecisionTreeNode internalNode = new DecisionTreeNode(parent, att.getName(), att.getValueNames(), mask);

            // Set the node statistics.
            internalNode.setTrainingStats(numTrainingExamplesReachHere,
                                    bestTrainingTargetIndex,
                                    numTrainingEgsCorrectClassUsingBestTrainingIndex,
                                    numTestingEgsCorrectClassUsingBestTrainingIndex);

            internalNode.setTestingStats(numTestingExamplesReachHere,
                                    bestTestingTargetIndex,
                                    numTestingEgsCorrectClassUsingBestTestingIndex,
                                    numTrainingEgsCorrectClassUsingBestTestingIndex);

            // Update the tree statistics.
            InternalNodes++;

            // Now, attach the new internal node to the supplied node.
            if (parent != null)
                parent.setChild(arcNum, internalNode);

            // Add a reference to the new node to the node list.
            Nodes.Add(internalNode);

            return internalNode;
        }
        private int _trainTrainingCorrectClass; // Number of training examples

        #endregion Fields

        #region Constructors

        public DecisionTreeNode(DecisionTreeNode parent,
            String label,
            Object[] arcLabels,
            AttributeMask mask)
        {
            if (label == null || mask == null)
                throw new Exception("Label or attribute mask is null.");

            Parent = parent;
            NodeLabel = label;
            ArcLabels = arcLabels;
            AttMask = mask;

            // The number of possible children for the node is
            // equal to the number of values for this attribute.

            // If the arc label array was null, then this is
            // a leaf node that has no children.
            if (arcLabels != null)
                Children = new DecisionTreeNode[ArcLabels.Length];

            // The node is initially unflagged.
            NodeFlag = -2;
        }
        /**
         * An implementation of the recursive decision tree
         * learning algorithm.  Given a parent node and an arc
         * number, the method will attach a new decision 'sub'-tree
         * below the parent node.
         *
         * @param parent The parent node for the new decision tree.
         *
         * @param arcNum The arc number (or path) along which the
         *        new subtree will be attached.
         *
         * @return true if an entire subtree was successfully added,
         *         false otherwise.
         */
        public bool learnDT(DecisionTreeNode parent, int arcNum)
        {
            AttributeMask mask;

            if (parent == null)
            {
                // We have to add at the root.
                mask = new AttributeMask(DatasetUse.getNumAttributes());
            }
            else
            {
                mask = new AttributeMask(parent.getMask());

                // Mask off the specified arc number.
                try
                {
                    mask.mask(DatasetUse.getAttributePosition(parent.getLabel()), arcNum);
                }
                catch (Exception e)
                {
                    //e.printStackTrace();
                    return false;
                }
            }

            // Now, classify the examples at the current position.
            int[] conclusion = new int[8];
            int result = classifyExamples(mask, conclusion, null, null, null);

            Attribute target = DatasetUse.getTargetAttribute();
            int numTargetVals = target.getNumValues();
            String label;

            if (result == DATASET_EMPTY)
            {
                // If no examples reach our current position
                // we add a leaf with the most common target
                // classfication for the parent node.

                // Save testing results.
                int numTestingExamplesReachHere = conclusion[5];
                int bestTestingTargetIndex = conclusion[4];
                int numTestingExamplesCorrectClass = conclusion[6];
                int numTrainingExamplesCorrectClass = conclusion[7];

                classifyExamples(parent.getMask(), conclusion, null, null, null);

                try
                {
                    label = target.getAttributeValueByNum(conclusion[0]);
                }
                catch (Exception e)
                {
                    return false;
                }

                // We have to grab the counts again for the testing data...
                int[] currTestingCounts = new int[target.getNumValues()];
                getExampleCounts(mask, DatasetUse.getTestingExamples(), currTestingCounts, null);

                // Mask target value and add a leaf to the tree.
                mask.mask(0, conclusion[0]);

                DecisionTreeNode node = Tree.addLeafNode(parent,
                                      arcNum,
                                      label,
                                      mask,
                                      0,
                                      conclusion[0],
                                      0,
                                      currTestingCounts[conclusion[0]],
                                      numTestingExamplesReachHere,
                                      bestTestingTargetIndex,
                                      numTestingExamplesCorrectClass,
                                      numTrainingExamplesCorrectClass);

                return true;
            }

            if (result == DATASET_IDENT_CONCL)
            {
                // Pure result - we can add a leaf node with the
                // correct target attribute value.
                try
                {
                    label = target.getAttributeValueByNum(conclusion[0]);
                }
                catch (Exception e)
                {
                    //e.printStackTrace();
                    return false;
                }

                // Mask target value and add a leaf to the tree.
                mask.mask(0, conclusion[0]);

                DecisionTreeNode node = Tree.addLeafNode(parent,
                                      arcNum,
                                      label,
                                      mask,
                                      conclusion[1],
                                      conclusion[0],
                                      conclusion[2],
                                      conclusion[3],
                                      conclusion[5],
                                      conclusion[4],
                                      conclusion[6],
                                      conclusion[7]);

                return true;
            }

            // Mixed conclusion - so we have to select
            // an attribute to split on, and then build a
            // new internal node with that attribute.

            // First, generate statistics - this may take awhile.
            int[] nodeStats = new int[numTargetVals];
            List<Attribute> availableAtts = generateStats(mask, nodeStats);

            if (availableAtts.Count == 0)
            {
                // No attributes left to split on - so use
                // the most common target value at the current position.
                try
                {
                    label = target.getAttributeValueByNum(conclusion[0]);
                }
                catch (Exception e)
                {
                    //e.printStackTrace();
                    return false;
                }

                mask.mask(0, conclusion[0]);

                DecisionTreeNode node = Tree.addLeafNode(parent,
                                      arcNum,
                                      label,
                                      mask,
                                      conclusion[1],
                                      conclusion[0],
                                      conclusion[2],
                                      conclusion[3],
                                      conclusion[5],
                                      conclusion[4],
                                      conclusion[6],
                                      conclusion[7]);

                return true;
            }

            // Choose an attribute, based on the set of
            // available attributes.
            List<double> results = new List<double>();
            Attribute att = chooseAttribute(availableAtts, nodeStats, results);

            int attPos;

            try
            {
                attPos = DatasetUse.getAttributePosition(att.getName());
            }
            catch (Exception e)
            {
                //e.printStackTrace();
                return false;
            }

            DecisionTreeNode newParent = Tree.addInternalNode(parent,
                                      arcNum,
                                      attPos,
                                      att,
                                      mask,
                                      conclusion[1],
                                      conclusion[0],
                                      conclusion[2],
                                      conclusion[3],
                                      conclusion[5],
                                      conclusion[4],
                                      conclusion[6],
                                      conclusion[7]);

            // Now, recursively decend along each branch of the new node.
            for (int j = 0; j < newParent.getArcLabelCount(); j++)
            {
                // Recursive call.
                if (!learnDT(newParent, j)) return false;
            }

            return true;
        }
        /**
          * Generates statistics (used for splitting) based on the
          * current position in the tree (as defined by an
          * attribute mask).
          *
          * @return A Vector that contains the available attributes.
          *         Each attribute's internal statistics array is
          *         populated with appropriate data.  The supplied
          *         stats array is filled with counts of the number of
          *         examples that fall into each of the target classes
          *         at the current position in the tree.
          */
        public List<Attribute> generateStats(AttributeMask mask, int[] stats)
        {
            // First, we fill the stats array - this is not the
            // most efficient approach, since we're looping through
            // the data several times.
            getExampleCounts(mask, DatasetUse.getTrainingExamples(), stats, null);

            // Now, we have to go through the attribute mask
            // and locate the attributes that are still available.
            List<Attribute> results = new List<Attribute>();

            // Create a new mask that we can modify.
            AttributeMask newMask = new AttributeMask(mask);

            // We don't use position 0, that's where the target attribute is.
            for (int i = 1; i < mask.getNumAttributes(); i++)
            {
                if (newMask.isMasked(i) == AttributeMask.UNUSED)
                {
                    // This attribute is available, so we calculate stats for it.
                    Attribute att = null;

                    try
                    {
                        att = DatasetUse.getAttributeByNum(i);
                    }
                    catch (Exception e)
                    {
                        // This can't happen!!
                        return null;
                    }

                    int[][] attStats = att.getStatsArray();

                    // Modify the mask and fill in the arrays.
                    for (int j = 0; j < att.getNumValues(); j++)
                    {
                        newMask.mask(i, j);
                        getExampleCounts(newMask, DatasetUse.getTrainingExamples(), attStats[j], null);
                    }

                    // Reset the mask.
                    newMask.unmask(i);
                    results.Add(att);
                }
            }

            return results;
        }
        /**
         * Classifies all examples in the current set of
         * examples, by target attribute value.  The
         * attribute mask determines which examples from the
         * dataset form the current example set.
         *
         * @param mask The current attribute mask that
         *        indicates which examples from the dataset
         *        should be considered.
         *
         * @param conclusion The method expects the parameter
         *        to be an array of size 8.  Positions in
         *        the array are filled with the following
         *        values.
         *
         *        <ul>
         *            <li><i>Position 0</i> - Records the
         *            index of the most common target attribute
         *            value from the training dataset.
         *
         *            <li><i>Position 1</i> - Records the number
         *            of training examples from the dataset that
         *            reach the current position.
         *
         *            <li><i>Position 2</i> - Records the number
         *            of training examples from the dataset
         *            that would be correcly classified
         *            <i>if a leaf with the most common training
         *            target classification</i> were added at the
         *            current position.
         *
         *            <li><i>Position 3</i> - Records the number
         *            if testing examples from the dataset
         *            that would be correctly classified
         *            <i>if a leaf with the most common training
         *            target classification</i> were added at the
         *            current position.
         *
         *            <li><i>Position 4</i> - Records the index
         *            of the most common target attribute
         *            value from the testing dataset.
         *
         *            <li><i>Position 5</i> - Records the number
         *            of testing examples from the dataset that
         *            reach the current position.
         *
         *            <li><i>Position 6</i> - Records the number
         *            of testing examples from the dataset
         *            that would be correcly classified
         *            <i>if a leaf with the most common testing
         *            target classification</i> were added at the
         *            current position.
         *
         *            <li><i>Position 7</i> - Records the number
         *            if training examples from the dataset
         *            that would be correctly classified
         *            <i>if a leaf with the most common testing
         *            target classification</i> were added at the
         *            current position.
         *        </ul>
         *
         * @param trainingCounts The method expects the parameter to be
         *        an array with a size equal to the number of
         *        target attribute values.  Each position in
         *        the array is filled with a corresponding count
         *        of the number of training examples that fall into
         *        that particular target class, at the current
         *        position in the tree.  This parameter can be null
         *        if training count data is not required.
         *
         * @param testingCounts The method expects the parameter to be
         *        an array with a size equal to the number of
         *        target attribute values.  Each position in the
         *        array is filled with a corresponding count of
         *        the number of testing examples that fall into
         *        that particular target class, at the current
         *        position in the tree.  This parameter can be null
         *        if testing count data is not required.
         *
         * @param examples The method expects the parameter to be
         *        an array with a size equal to the number of
         *        training examples in the dataset.  Each entry in
         *        the array is marked with true or false, depending
         *        on whether or not a particular example reaches
         *        the current position.
         *
         * @return DATASET_MIXED_CONCL if the examples have
         *         multiple, different target attribute values.
         *         DATASET_IDENT_CONCL if all the exmamples have
         *         the same target attribute value.
         *         DATASET_EMPTY if there are no examples in the
         *         current example set.
         *
         *         <p>
         *         If the result is DATASET_IDENT_CONCL, the
         *         index of the single target attribute value
         *         is returned in <code>conclusion[0]</code>.  If
         *         the result is DATASET_EMPTY, the index of the
         *         most common target attribute value is returned
         *         in <code>conclusion[0]</code>.
         */
        public int classifyExamples(AttributeMask mask,
            int[] conclusion,
            int[] trainingCounts,
            int[] testingCounts,
            bool[] examples)
        {
            if (mask == null || conclusion == null)
                throw new Exception("Mask or conclusion array is null.");

            // Determine the number of target attribute values
            // and create some storage space for our counts.
            int[] currTrainingCounts = null;
            int[] currTestingCounts = null;

            if (trainingCounts != null)
                currTrainingCounts = trainingCounts;
            else
                currTrainingCounts = new
                  int[DatasetUse.getTargetAttribute().getNumValues()];

            if (testingCounts != null)
                currTestingCounts = testingCounts;
            else
                currTestingCounts = new int[currTrainingCounts.Length];

            getExampleCounts(mask, DatasetUse.getTrainingExamples(), currTrainingCounts, examples);
            getExampleCounts(mask, DatasetUse.getTestingExamples(), currTestingCounts, null);

            // Init results.
            conclusion[0] = 0;   // Training target attribute value index.

            conclusion[1] = 0;   // Total number of training examples
            // reaching node.
            conclusion[2] = 0;   // Number of training examples correctly
            // classified if this node were a leaf
            // with most common training target value.
            conclusion[3] = 0;   // Number of testing examples correctly
            // classified if this node were a leaf
            // with most common training target value.

            conclusion[4] = 0;   // Testing target attribute value index.

            conclusion[5] = 0;   // Total number of testing examples
            // reaching node.
            conclusion[6] = 0;   // Number of testing examples correctly
            // classified if this node were a leaf
            // with most common testing target value.
            conclusion[7] = 0;   // Number of training examples correctly
            // classified if this node were a leaf
            // with most common testing target value.

            // Examine the results and determine the conclusion.
            int result = DATASET_EMPTY;

            for (int i = 0; i < currTrainingCounts.Length; i++)
            {
                // Increment # of examples that reach this position.
                conclusion[1] += currTrainingCounts[i];
                conclusion[5] += currTestingCounts[i];

                if (result == DATASET_EMPTY && currTrainingCounts[i] != 0)
                    result = DATASET_IDENT_CONCL;
                else if (result == DATASET_IDENT_CONCL && currTrainingCounts[i] != 0)
                    result = DATASET_MIXED_CONCL;

                if (currTrainingCounts[i] >= currTrainingCounts[conclusion[0]])
                {
                    // This target value is more common in the training set.
                    conclusion[0] = i;
                    conclusion[2] = currTrainingCounts[i];
                    conclusion[3] = currTestingCounts[i];
                }

                if (currTestingCounts[i] >= currTestingCounts[conclusion[4]])
                {
                    // This target value is more common in the testing set.
                    conclusion[4] = i;
                    conclusion[6] = currTestingCounts[i];
                    conclusion[7] = currTrainingCounts[i];
                }
            }
            return result;
        }
        /**
           * Fills the supplied array with the number of examples
           * from the current dataset that fall into each of the
           * target categories (based on the attribute mask).
           *
           * @param mask The mask that determines which examples
           *        reach the current position in the decision
           *        tree.
           *
           * @param examples An iteration over a series of
           *        examples from the current dataset.
           *
           * @param counts The method expects the parameter to be
           *        an array with a size equal to the number of
           *        target attribute values.  Each position in
           *        the array is filled with a corresponding count
           *        of the number of training examples that fall into
           *        that particular target class, at the current
           *        position in the decision tree.
           *
           * @param reachedHere The method expects the parameter
           *        to be an array with a size equal to the
           *        <i>total</i> number of examples being examined.
           *        Each cell in the array is set to true or
           *        false, depending on whether or not the
           *        corresponding example reaches the current
           *        position in the decision tree.
           */
        private void getExampleCounts(AttributeMask mask, IEnumerator<int[]> examples, int[] counts, bool[] reachedHere)
        {
            // Zero any values currently in stats.
            for (int i = 0; i < counts.Length; i++)
            {
                counts[i] = 0;
            }

            int j = 0;

            // Loop through and get totals.
            while (examples.MoveNext())
            {
                int[] example = (int[])examples.Current;

                if (mask.matchMask(example))
                {
                    counts[example[0]]++;   // Increment appropriate
                    // target count.
                    if (reachedHere != null && reachedHere.Length > j)
                        reachedHere[j] = true;
                }

                j++;
            }
        }
 /// <summary>
 /// Builds a new mask, copying the state of the
 /// supplied mask to the new mask.
 /// </summary>
 /// <param name="mask">An Attribute mask</param>
 public AttributeMask(AttributeMask mask)
 {
     Mask = (int[])mask.Mask.Clone();
 }
 /// <summary>
 /// Builds a new mask, copying the state of the
 /// supplied mask to the new mask.
 /// </summary>
 /// <param name="mask">An Attribute mask</param>
 public AttributeMask(AttributeMask mask)
 {
     Mask = (int[])mask.Mask.Clone();
 }
示例#11
0
        /// <summary>
        /// Attaches a new leaf node to the supplied node, along
        /// the specified arc.
        /// </summary>        
        public DecisionTreeNode addLeafNode(DecisionTreeNode parent,
            int arcNum,
            String label,
            AttributeMask mask,
            int numTrainingExamplesReachHere,
            int bestTrainingTargetIndex,
            int numTrainingEgsCorrectClassUsingBestTrainingIndex,
            int numTestingEgsCorrectClassUsingBestTrainingIndex,
            int numTestingExamplesReachHere,
            int bestTestingTargetIndex,
            int numTestingEgsCorrectClassUsingBestTestingIndex,
            int numTrainingEgsCorrectClassUsingBestTestingIndex)
        {
            // Create new leaf node.
            DecisionTreeNode leaf =
              new DecisionTreeNode(parent, label, null, mask);

            // Set the node statistics.
            leaf.setTrainingStats(numTrainingExamplesReachHere,
                                   bestTrainingTargetIndex,
                                   numTrainingEgsCorrectClassUsingBestTrainingIndex,
                                   numTestingEgsCorrectClassUsingBestTrainingIndex);

            leaf.setTestingStats(numTestingExamplesReachHere,
                                  bestTestingTargetIndex,
                                  numTestingEgsCorrectClassUsingBestTestingIndex,
                                  numTrainingEgsCorrectClassUsingBestTestingIndex);

            // Update the tree statistics.
            TrainingCorrect += numTrainingEgsCorrectClassUsingBestTrainingIndex;
            TestingCorrect += numTestingEgsCorrectClassUsingBestTrainingIndex;

            // Now, attach the new leaf to the supplied node.
            if (parent != null)
                parent.setChild(arcNum, leaf);

            // Add a reference to the new node to the node list.
            Nodes.Add(leaf);

            // Determine if the tree is complete.
            if (findIncompleteNode((DecisionTreeNode)Nodes[0], new int[1]) == null)
            {
                Complete = true;
            }

            return leaf;
        }