コード例 #1
0
        /**
         * An implementation of the recursive decision tree
         * learning algorithm.  Given a parent node and an arc
         * number, the method will attach a new decision 'sub'-tree
         * below the parent node.
         *
         * @param parent The parent node for the new decision tree.
         *
         * @param arcNum The arc number (or path) along which the
         *        new subtree will be attached.
         *
         * @return true if an entire subtree was successfully added,
         *         false otherwise.
         */
        public bool learnDT(DecisionTreeNode parent, int arcNum)
        {
            AttributeMask mask;

            if (parent == null)
            {
                // We have to add at the root.
                mask = new AttributeMask(DatasetUse.getNumAttributes());
            }
            else
            {
                mask = new AttributeMask(parent.getMask());

                // Mask off the specified arc number.
                try
                {
                    mask.mask(DatasetUse.getAttributePosition(parent.getLabel()), arcNum);
                }
                catch (Exception e)
                {
                    //e.printStackTrace();
                    return false;
                }
            }

            // Now, classify the examples at the current position.
            int[] conclusion = new int[8];
            int result = classifyExamples(mask, conclusion, null, null, null);

            Attribute target = DatasetUse.getTargetAttribute();
            int numTargetVals = target.getNumValues();
            String label;

            if (result == DATASET_EMPTY)
            {
                // If no examples reach our current position
                // we add a leaf with the most common target
                // classfication for the parent node.

                // Save testing results.
                int numTestingExamplesReachHere = conclusion[5];
                int bestTestingTargetIndex = conclusion[4];
                int numTestingExamplesCorrectClass = conclusion[6];
                int numTrainingExamplesCorrectClass = conclusion[7];

                classifyExamples(parent.getMask(), conclusion, null, null, null);

                try
                {
                    label = target.getAttributeValueByNum(conclusion[0]);
                }
                catch (Exception e)
                {
                    return false;
                }

                // We have to grab the counts again for the testing data...
                int[] currTestingCounts = new int[target.getNumValues()];
                getExampleCounts(mask, DatasetUse.getTestingExamples(), currTestingCounts, null);

                // Mask target value and add a leaf to the tree.
                mask.mask(0, conclusion[0]);

                DecisionTreeNode node = Tree.addLeafNode(parent,
                                      arcNum,
                                      label,
                                      mask,
                                      0,
                                      conclusion[0],
                                      0,
                                      currTestingCounts[conclusion[0]],
                                      numTestingExamplesReachHere,
                                      bestTestingTargetIndex,
                                      numTestingExamplesCorrectClass,
                                      numTrainingExamplesCorrectClass);

                return true;
            }

            if (result == DATASET_IDENT_CONCL)
            {
                // Pure result - we can add a leaf node with the
                // correct target attribute value.
                try
                {
                    label = target.getAttributeValueByNum(conclusion[0]);
                }
                catch (Exception e)
                {
                    //e.printStackTrace();
                    return false;
                }

                // Mask target value and add a leaf to the tree.
                mask.mask(0, conclusion[0]);

                DecisionTreeNode node = Tree.addLeafNode(parent,
                                      arcNum,
                                      label,
                                      mask,
                                      conclusion[1],
                                      conclusion[0],
                                      conclusion[2],
                                      conclusion[3],
                                      conclusion[5],
                                      conclusion[4],
                                      conclusion[6],
                                      conclusion[7]);

                return true;
            }

            // Mixed conclusion - so we have to select
            // an attribute to split on, and then build a
            // new internal node with that attribute.

            // First, generate statistics - this may take awhile.
            int[] nodeStats = new int[numTargetVals];
            List<Attribute> availableAtts = generateStats(mask, nodeStats);

            if (availableAtts.Count == 0)
            {
                // No attributes left to split on - so use
                // the most common target value at the current position.
                try
                {
                    label = target.getAttributeValueByNum(conclusion[0]);
                }
                catch (Exception e)
                {
                    //e.printStackTrace();
                    return false;
                }

                mask.mask(0, conclusion[0]);

                DecisionTreeNode node = Tree.addLeafNode(parent,
                                      arcNum,
                                      label,
                                      mask,
                                      conclusion[1],
                                      conclusion[0],
                                      conclusion[2],
                                      conclusion[3],
                                      conclusion[5],
                                      conclusion[4],
                                      conclusion[6],
                                      conclusion[7]);

                return true;
            }

            // Choose an attribute, based on the set of
            // available attributes.
            List<double> results = new List<double>();
            Attribute att = chooseAttribute(availableAtts, nodeStats, results);

            int attPos;

            try
            {
                attPos = DatasetUse.getAttributePosition(att.getName());
            }
            catch (Exception e)
            {
                //e.printStackTrace();
                return false;
            }

            DecisionTreeNode newParent = Tree.addInternalNode(parent,
                                      arcNum,
                                      attPos,
                                      att,
                                      mask,
                                      conclusion[1],
                                      conclusion[0],
                                      conclusion[2],
                                      conclusion[3],
                                      conclusion[5],
                                      conclusion[4],
                                      conclusion[6],
                                      conclusion[7]);

            // Now, recursively decend along each branch of the new node.
            for (int j = 0; j < newParent.getArcLabelCount(); j++)
            {
                // Recursive call.
                if (!learnDT(newParent, j)) return false;
            }

            return true;
        }
コード例 #2
0
        /**
          * An implementation of the recursive decision tree
          * pessimistic pruning algorithm.  Given a parent
          * node, the method will prune all the branches
          * below the node.
          *
          * @param node The root node of the tree to prune.
          *
          * @param error A <code>double</code> array of size 1. The
          *        array is used to store the current error value.
          *
          * @return <code>true</code> if an entire subtree was successfully
          *         pruned, or <code>false</code> otherwise.
          */
        public bool prunePessimisticDT(DecisionTreeNode node, double[] error)
        {
            // Post-order walk through the tree, marking
            // our path as we go along.
            if (node.isLeaf())
            {
                if (node.getTrainingEgsAtNode() == 0)
                {
                    error[0] = 0;
                    return true;
                }
                else
                {
                    // We do the error calculation in two steps -
                    // Here we multiply the error value by the number
                    // of examples that reach the node.  When the method
                    // is called recursively, this value will be divided
                    // by the number of examples that reach the parent
                    // node (thus weighting the error from each child).
                    int errors1 = (int)node.getTrainingEgsAtNode() - node.getTrainingEgsCorrectClassUsingBestTrainingIndex();
                    double p1 = (double)(errors1 + 1.0) / (node.getTrainingEgsAtNode() + 2.0);

                    error[0] = node.getTrainingEgsAtNode() * errorBar(p1, node.getTrainingEgsAtNode()) + errors1;

                    return true;
                }
            }

            // We're at an internal node, so compute the error
            // of the children and use the result to determine
            // if we prune or not.
            double errorSum = 0;

            for (int i = 0; i < node.getArcLabelCount(); i++)
            {
                // Mark our current path.
                Tree.flagNode(node, i);

                if (!prunePessimisticDT(node.getChild(i), error))
                {
                    Tree.flagNode(node, -2);
                    return false;
                }

                errorSum += error[0];
            }

            // Mark the node as our current target.
            Tree.flagNode(node, -1);

            // Get the worst-case performance of this node.
            double errorWorst;

            if (node.getTrainingEgsAtNode() == 0)
            {
                error[0] = 0;
                return true;
            }

            int errors = (int)node.getTrainingEgsAtNode() - node.getTrainingEgsCorrectClassUsingBestTrainingIndex();
            double p = (double)(errors + 1.0) / (node.getTrainingEgsAtNode() + 2.0);

            errorWorst = (double)node.getTrainingEgsAtNode() * errorBar(p, node.getTrainingEgsAtNode()) + errors;

            DecisionTreeNode newNode = node;

            if (errorWorst < errorSum)
            {
                // We need to "prune" this node to a leaf.
                DecisionTreeNode parent = node.getParent();
                int arcNum = -1;

                if (parent != null)
                {
                    arcNum = parent.getChildPosition(node);
                }

                Tree.pruneSubtree(node);

                // Figure out the label for the new leaf.
                String label = null;

                try
                {
                    label = DatasetUse.getTargetAttribute().getAttributeValueByNum(node.getTrainingBestTarget());
                }
                catch (Exception e)
                {
                    // Should never happen.
                    //e.printStackTrace();
                }

                node.getMask().mask(0, node.getTrainingBestTarget());

                newNode =
                  Tree.addLeafNode(parent, arcNum, label,
                    node.getMask(),
                    node.getTrainingEgsAtNode(),
                    node.getTrainingBestTarget(),
                    node.getTrainingEgsCorrectClassUsingBestTrainingIndex(),
                    node.getTestingEgsCorrectClassUsingBestTrainingIndex(),
                    node.getTestingEgsAtNode(),
                    node.getTestingBestTarget(),
                    node.getTestingEgsCorrectClassUsingBestTestingIndex(),
                    node.getTrainingEgsCorrectClassUsingBestTestingIndex());
            }

            // Update the count.
            if (newNode.isLeaf())
            {
                error[0] = errorWorst;
            }
            else
            {
                error[0] = errorSum;
            }

            // All finished, unmark the node if it still exists.
            Tree.flagNode(node, -2);

            return true;
        }
コード例 #3
0
        /**
         * An implementation of the recursive decision tree
         * reduced error pruning algorithm.  Given a parent
         * node, the method will prune all the branches
         * below the node.
         *
         * @param node The root node of the tree to prune.
         *
         * @param error A <code>double</code> array of size 1. The
         *        array is used to store the current error value.
         *
         * @return <code>true</code> if an entire subtree was successfully
         *         pruned, or <code>false</code> otherwise.
         */
        public bool pruneReducedErrorDT(DecisionTreeNode node, double[] error)
        {
            if (node.isLeaf())
            {
                error[0] = node.getTestingEgsAtNode() - node.getTestingEgsCorrectClassUsingBestTrainingIndex();
                return true;
            }

            // We're at an internal node, so compute the error
            // of the children and use the result to determine
            // if we prune or not.
            double errorSum = 0;

            for (int i = 0; i < node.getArcLabelCount(); i++)
            {
                // Mark our current path.
                Tree.flagNode(node, i);

                if (!pruneReducedErrorDT(node.getChild(i), error))
                {
                    Tree.flagNode(node, -2);
                    return false;
                }

                errorSum += error[0];
            }

            // Mark the node as our current target.
            Tree.flagNode(node, -1);

            // Get the best-case performance of this node.
            double errorBest = node.getTestingEgsAtNode() - node.getTestingEgsCorrectClassUsingBestTestingIndex();

            DecisionTreeNode newNode = node;

            if (errorBest < errorSum)
            {
                // We need to "prune" this node to a leaf.
                DecisionTreeNode parent = node.getParent();
                int arcNum = -1;

                if (parent != null)
                {
                    arcNum = parent.getChildPosition(node);
                }

                Tree.pruneSubtree(node);

                // Figure out the label for the new leaf.
                String label = null;

                try
                {
                    label = DatasetUse.getTargetAttribute().getAttributeValueByNum(node.getTestingBestTarget());
                }
                catch (Exception e)
                {
                    // Should never happen.
                    //e.printStackTrace();
                }

                node.getMask().mask(0, node.getTestingBestTarget());

                newNode = Tree.addLeafNode(parent, arcNum, label,
                    node.getMask(),
                    node.getTrainingEgsAtNode(),
                    node.getTestingBestTarget(),
                    node.getTrainingEgsCorrectClassUsingBestTestingIndex(),
                    node.getTestingEgsCorrectClassUsingBestTestingIndex(),
                    node.getTestingEgsAtNode(),
                    node.getTestingBestTarget(),
                    node.getTestingEgsCorrectClassUsingBestTestingIndex(),
                    node.getTrainingEgsCorrectClassUsingBestTestingIndex());
            }

            // Update the count.
            if (newNode.isLeaf())
            {
                error[0] = errorBest;
            }
            else
            {
                error[0] = errorSum;
            }

            // All finished, unmark the node if it still exists.
            Tree.flagNode(node, -2);

            return true;
        }