Пример #1
0
        private static CARTTree GetSubTree(CARTTree tree)
        {
            var root  = tree._root;
            var stack = new Stack <CARTNode>();

            AccessNonLeaf(root, stack);
            double   minAlpha = double.MaxValue; // 最小alpha
            CARTNode minNode  = null;            // 最小alpha对应的内部节点

            while (stack.Count > 0)
            {
                var t = stack.Pop();    // 某一内部节点
                // 以 t 为根节点的子树的所有叶节点
                var leaves_t = CARTUtil.GetLeaves(t);
                // 以 t 为单节点,平方误差为
                var var_t = CARTUtil.GetVar(t);
                // 以 t 为根节点的子树,平方误差为
                double var_subtree = 0;
                for (int i = 0; i < leaves_t.Count; i++)
                {
                    var leaf = leaves_t[i];
                    var_subtree += CARTUtil.GetVar(leaf);
                }
                var alpha_t = (var_t - var_subtree) / (leaves_t.Count - 1);
                if (minAlpha > alpha_t)
                {
                    minAlpha = alpha_t;
                    minNode  = t;
                }
            }
            // 获得最小的alpha,则对此节点剪枝,
            // 需要注意的是,由于最终要生成一个子树序列,所以,我们不对原来的树剪枝,而是复制一个树并剪枝
            return(PrunedClone(tree, minNode));
        }
Пример #2
0
        /// <summary>
        /// 生成决策树
        /// </summary>
        /// <param name="node"></param>
        /// <param name="attrIdxs"></param>
        /// <param name="points"></param>
        private static void Create(CARTNode node, List <int> attrIdxs, List <CARTPoint> points)
        {
            node.region = points;
            // 根据CART分裂策略,分裂后的区域内样本点数量至少为1,不可能为0
            if (points.Count == 1)
            {
                // 如果为1,则不再分裂,直接设置为叶节点
                node.output = points[0].vals.LastOrDefault();
            }
            else
            {
                var ave = points.Sum(p => p.vals.LastOrDefault()) / points.Count;
                // 没有可用于分裂的属性,则设置叶节点
                // 输出值的估计为区域中样本点输出值的均值
                if (attrIdxs.Count == 0)
                {
                    node.output = ave;
                }
                else
                {
                    // 先计算整体的样本点的方差,如果小于阈值,则不分裂
                    double squareErr = 0;
                    foreach (var p in points)
                    {
                        squareErr += (p.vals.LastOrDefault() - ave) * (p.vals.LastOrDefault() - ave);
                    }
                    if (squareErr < ave / 1000)
                    {
                        // 如果方差小于一个阈值,则停止分裂,这里为了简单起见,阈值hardcode
                        node.output = ave;
                    }
                    else
                    {
                        TempResult minTemp = null;  // 最小平方误差
                        int        minJ    = 0;     // 对应的分裂属性索引
                        for (var i = 0; i < attrIdxs.Count; i++)
                        {
                            var j    = attrIdxs[i];     // 输入属性的索引
                            var temp = CARTUtil.SquareError(j, points);
                            if (minTemp == null || temp.lossVal < minTemp.lossVal)
                            {
                                minTemp = temp;
                                minJ    = j;
                            }
                        }

                        // 得到最小平方误差
                        node.j        = minJ;
                        node.splitVal = minTemp.splitVal;

                        node.left = new CARTNode()
                        {
                            parent = node
                        };
                        node.right = new CARTNode()
                        {
                            parent = node
                        };

                        var leftAttrIdxs  = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList();
                        var rightAttrIdxs = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList();
                        // 递归创建左右子节点
                        Create(node.left, leftAttrIdxs, minTemp.region1);
                        Create(node.right, rightAttrIdxs, minTemp.region2);
                    }
                }
            }
        }