Ejemplo n.º 1
0
 public void Update(CARTNode node)
 {
     this.j        = node.j;
     this.splitVal = node.splitVal;
     this.output   = node.output;
     this.region   = node.region;
 }
Ejemplo n.º 2
0
        private static CARTTree GetSubTree(CARTTree tree)
        {
            var root  = tree._root;
            var stack = new Stack <CARTNode>();

            AccessNonLeaf(root, stack);
            double   minAlpha = double.MaxValue; // 最小alpha
            CARTNode minNode  = null;            // 最小alpha对应的内部节点

            while (stack.Count > 0)
            {
                var t = stack.Pop();    // 某一内部节点
                // 以 t 为根节点的子树的所有叶节点
                var leaves_t = CARTUtil.GetLeaves(t);
                // 以 t 为单节点,平方误差为
                var var_t = CARTUtil.GetVar(t);
                // 以 t 为根节点的子树,平方误差为
                double var_subtree = 0;
                for (int i = 0; i < leaves_t.Count; i++)
                {
                    var leaf = leaves_t[i];
                    var_subtree += CARTUtil.GetVar(leaf);
                }
                var alpha_t = (var_t - var_subtree) / (leaves_t.Count - 1);
                if (minAlpha > alpha_t)
                {
                    minAlpha = alpha_t;
                    minNode  = t;
                }
            }
            // 获得最小的alpha,则对此节点剪枝,
            // 需要注意的是,由于最终要生成一个子树序列,所以,我们不对原来的树剪枝,而是复制一个树并剪枝
            return(PrunedClone(tree, minNode));
        }
Ejemplo n.º 3
0
 private static void AccessNonLeaf(CARTNode node, Stack <CARTNode> stack)
 {
     // node有子节点,且不是根结点,说明是内部叶节点
     if (node.left != null && node.parent != null)
     {
         stack.Push(node);
         AccessNonLeaf(node.left, stack);
         AccessNonLeaf(node.right, stack);
     }
 }
Ejemplo n.º 4
0
        /// <summary>
        /// 获取方差,作为回归问题中的预测误差
        /// </summary>
        /// <param name="points"></param>
        /// <returns></returns>
        public static double GetVar(CARTNode node)
        {
            double ave = 0;

            if (node.left != null)
            {
                ave = node.region.Sum(p => p.vals.LastOrDefault()) / node.region.Count;
            }
            else
            {
                ave = node.output;
            }
            return(node.region.Sum(p => Math.Pow(p.vals.LastOrDefault() - ave, 2)));
        }
Ejemplo n.º 5
0
        private static CARTTree PrunedClone(CARTTree tree, CARTNode node)
        {
            var queue = new Queue <CARTNode>();          // 原始树队列

            queue.Enqueue(tree._root);
            var root_1  = new CARTNode();               // 新树根节点
            var queue_1 = new Queue <CARTNode>();       // 同步队列

            queue_1.Enqueue(root_1);

            while (queue.Count > 0)
            {
                var n   = queue.Dequeue();
                var n_1 = queue_1.Dequeue();

                if (n == node)   // 遇到需要被剪枝的内部节点,则需要剪枝为叶节点
                {
                    // 设置叶节点的必要字段
                    n_1.output = n.region.Sum(p => p.vals.LastOrDefault()) / n.region.Count;
                    n_1.region = n.region;
                }
                else
                {
                    n_1.Update(n);      // 更新节点的固有属性(字段)
                    if (n.left != null)
                    {
                        // 非叶节点
                        n_1.left = new CARTNode()
                        {
                            parent = n_1
                        };
                        n_1.right = new CARTNode()
                        {
                            parent = n_1
                        };

                        queue.Enqueue(n.left);
                        queue.Enqueue(n.right);

                        queue_1.Enqueue(n_1.left);
                        queue_1.Enqueue(n_1.right);
                    }
                    // else,是叶节点,无需其他操作
                }
            }
            return(new CARTTree()
            {
                _root = root_1
            });
        }
Ejemplo n.º 6
0
 /// <summary>
 /// 递归获取模型输出值
 /// </summary>
 /// <param name="point"></param>
 /// <param name="node"></param>
 /// <returns></returns>
 private static double Judge(CARTPoint point, CARTNode node)
 {
     if (node.left != null)
     {
         return(node.output);
     }
     else
     {
         if (point.vals[node.j] < node.splitVal)
         {
             return(Judge(point, node.left));
         }
         else
         {
             return(Judge(point, node.right));
         }
     }
 }
Ejemplo n.º 7
0
        /// <summary>
        /// 获取以指定节点为根结点的子树中的所有叶节点
        /// </summary>
        /// <param name="node"></param>
        /// <returns></returns>
        public static List <CARTNode> GetLeaves(CARTNode node)
        {
            var list  = new List <CARTNode>();
            var queue = new Queue <CARTNode>();

            queue.Enqueue(node);
            while (queue.Count > 0)
            {
                var n = queue.Dequeue();
                if (n.left == null)
                {
                    list.Add(n);
                }
                else
                {
                    queue.Enqueue(n.left);
                    queue.Enqueue(n.right);
                }
            }
            return(list);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// 生成决策树
        /// </summary>
        /// <param name="node"></param>
        /// <param name="attrIdxs"></param>
        /// <param name="points"></param>
        private static void Create(CARTNode node, List <int> attrIdxs, List <CARTPoint> points)
        {
            node.region = points;
            // 根据CART分裂策略,分裂后的区域内样本点数量至少为1,不可能为0
            if (points.Count == 1)
            {
                // 如果为1,则不再分裂,直接设置为叶节点
                node.output = points[0].vals.LastOrDefault();
            }
            else
            {
                var ave = points.Sum(p => p.vals.LastOrDefault()) / points.Count;
                // 没有可用于分裂的属性,则设置叶节点
                // 输出值的估计为区域中样本点输出值的均值
                if (attrIdxs.Count == 0)
                {
                    node.output = ave;
                }
                else
                {
                    // 先计算整体的样本点的方差,如果小于阈值,则不分裂
                    double squareErr = 0;
                    foreach (var p in points)
                    {
                        squareErr += (p.vals.LastOrDefault() - ave) * (p.vals.LastOrDefault() - ave);
                    }
                    if (squareErr < ave / 1000)
                    {
                        // 如果方差小于一个阈值,则停止分裂,这里为了简单起见,阈值hardcode
                        node.output = ave;
                    }
                    else
                    {
                        TempResult minTemp = null;  // 最小平方误差
                        int        minJ    = 0;     // 对应的分裂属性索引
                        for (var i = 0; i < attrIdxs.Count; i++)
                        {
                            var j    = attrIdxs[i];     // 输入属性的索引
                            var temp = CARTUtil.SquareError(j, points);
                            if (minTemp == null || temp.lossVal < minTemp.lossVal)
                            {
                                minTemp = temp;
                                minJ    = j;
                            }
                        }

                        // 得到最小平方误差
                        node.j        = minJ;
                        node.splitVal = minTemp.splitVal;

                        node.left = new CARTNode()
                        {
                            parent = node
                        };
                        node.right = new CARTNode()
                        {
                            parent = node
                        };

                        var leftAttrIdxs  = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList();
                        var rightAttrIdxs = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList();
                        // 递归创建左右子节点
                        Create(node.left, leftAttrIdxs, minTemp.region1);
                        Create(node.right, rightAttrIdxs, minTemp.region2);
                    }
                }
            }
        }