예제 #1
0
        private static CARTTree GetSubTree(CARTTree tree)
        {
            var root  = tree._root;
            var stack = new Stack <CARTNode>();

            AccessNonLeaf(root, stack);
            double   minAlpha = double.MaxValue; // 最小alpha
            CARTNode minNode  = null;            // 最小alpha对应的内部节点

            while (stack.Count > 0)
            {
                var t = stack.Pop();    // 某一内部节点
                // 以 t 为根节点的子树的所有叶节点
                var leaves_t = CARTUtil.GetLeaves(t);
                // 以 t 为单节点,平方误差为
                var var_t = CARTUtil.GetVar(t);
                // 以 t 为根节点的子树,平方误差为
                double var_subtree = 0;
                for (int i = 0; i < leaves_t.Count; i++)
                {
                    var leaf = leaves_t[i];
                    var_subtree += CARTUtil.GetVar(leaf);
                }
                var alpha_t = (var_t - var_subtree) / (leaves_t.Count - 1);
                if (minAlpha > alpha_t)
                {
                    minAlpha = alpha_t;
                    minNode  = t;
                }
            }
            // 获得最小的alpha,则对此节点剪枝,
            // 需要注意的是,由于最终要生成一个子树序列,所以,我们不对原来的树剪枝,而是复制一个树并剪枝
            return(PrunedClone(tree, minNode));
        }
예제 #2
0
        /// <summary>
        /// 根据样本数据创建CART决策树
        /// </summary>
        /// <param name="data"></param>
        /// <returns></returns>
        public static CARTTree Create(CARTData data)
        {
            var tree = new CARTTree()
            {
                _root = new CARTNode()
            };
            var attrIdxs = Enumerable.Range(0, data.J - 1).ToList();   // 输入属性索引列表

            Create(tree._root, attrIdxs, data.trainSet);
            return(Prune(tree, data));
        }
예제 #3
0
        /// <summary>
        /// 是否是三节点组成的树, 即一个根节点加两个子节点
        /// </summary>
        /// <param name="tree"></param>
        /// <returns></returns>
        private static bool Is_ThreeNode_Tree(CARTTree tree)
        {
            var left  = tree._root.left;
            var right = tree._root.right;

            if (left.left != null || right.left != null)
            {
                return(false);
            }
            return(true);
        }
예제 #4
0
        private static CARTTree PrunedClone(CARTTree tree, CARTNode node)
        {
            var queue = new Queue <CARTNode>();          // 原始树队列

            queue.Enqueue(tree._root);
            var root_1  = new CARTNode();               // 新树根节点
            var queue_1 = new Queue <CARTNode>();       // 同步队列

            queue_1.Enqueue(root_1);

            while (queue.Count > 0)
            {
                var n   = queue.Dequeue();
                var n_1 = queue_1.Dequeue();

                if (n == node)   // 遇到需要被剪枝的内部节点,则需要剪枝为叶节点
                {
                    // 设置叶节点的必要字段
                    n_1.output = n.region.Sum(p => p.vals.LastOrDefault()) / n.region.Count;
                    n_1.region = n.region;
                }
                else
                {
                    n_1.Update(n);      // 更新节点的固有属性(字段)
                    if (n.left != null)
                    {
                        // 非叶节点
                        n_1.left = new CARTNode()
                        {
                            parent = n_1
                        };
                        n_1.right = new CARTNode()
                        {
                            parent = n_1
                        };

                        queue.Enqueue(n.left);
                        queue.Enqueue(n.right);

                        queue_1.Enqueue(n_1.left);
                        queue_1.Enqueue(n_1.right);
                    }
                    // else,是叶节点,无需其他操作
                }
            }
            return(new CARTTree()
            {
                _root = root_1
            });
        }
예제 #5
0
        /// <summary>
        /// 剪枝
        /// </summary>
        /// <param name="tree">完全生长的决策树</param>
        /// <param name="data">提供验证数据集</param>
        /// <returns></returns>
        private static CARTTree Prune(CARTTree tree, CARTData data)
        {
            // 获取最优子树序列
            var list = new List <CARTTree>()
            {
                tree
            };                                           // 最优子树序列
            var curTree = tree;

            while (!Is_ThreeNode_Tree(curTree))
            {
                curTree = GetSubTree(curTree);
                list.Add(curTree);
            }

            // 使用验证集获得最终的最优子树

            // 验证集,最小平方误差
            double   min_err   = double.MaxValue;
            CARTTree best_tree = null;      // 最小平方误差对应的最优子树

            for (int i = 0; i < list.Count; i++)
            {
                var    sub_tree     = list[i];
                double squareErrSum = 0;
                for (int k = 0; k < data.verifySet.Count; k++)
                {
                    squareErrSum += Judge(data.verifySet[k], sub_tree);
                }
                if (min_err > squareErrSum)
                {
                    min_err   = squareErrSum;
                    best_tree = sub_tree;
                }
            }
            return(best_tree);
        }
예제 #6
0
 /// <summary>
 /// 决策:根据输入计算模型输出值
 /// </summary>
 /// <param name="point">样本点</param>
 /// <param name="tree">决策树</param>
 /// <returns></returns>
 public static double Judge(CARTPoint point, CARTTree tree) => Judge(point, tree._root);