public void Update(CARTNode node) { this.j = node.j; this.splitVal = node.splitVal; this.output = node.output; this.region = node.region; }
private static CARTTree GetSubTree(CARTTree tree) { var root = tree._root; var stack = new Stack <CARTNode>(); AccessNonLeaf(root, stack); double minAlpha = double.MaxValue; // 最小alpha CARTNode minNode = null; // 最小alpha对应的内部节点 while (stack.Count > 0) { var t = stack.Pop(); // 某一内部节点 // 以 t 为根节点的子树的所有叶节点 var leaves_t = CARTUtil.GetLeaves(t); // 以 t 为单节点,平方误差为 var var_t = CARTUtil.GetVar(t); // 以 t 为根节点的子树,平方误差为 double var_subtree = 0; for (int i = 0; i < leaves_t.Count; i++) { var leaf = leaves_t[i]; var_subtree += CARTUtil.GetVar(leaf); } var alpha_t = (var_t - var_subtree) / (leaves_t.Count - 1); if (minAlpha > alpha_t) { minAlpha = alpha_t; minNode = t; } } // 获得最小的alpha,则对此节点剪枝, // 需要注意的是,由于最终要生成一个子树序列,所以,我们不对原来的树剪枝,而是复制一个树并剪枝 return(PrunedClone(tree, minNode)); }
private static void AccessNonLeaf(CARTNode node, Stack <CARTNode> stack) { // node有子节点,且不是根结点,说明是内部叶节点 if (node.left != null && node.parent != null) { stack.Push(node); AccessNonLeaf(node.left, stack); AccessNonLeaf(node.right, stack); } }
/// <summary> /// 获取方差,作为回归问题中的预测误差 /// </summary> /// <param name="points"></param> /// <returns></returns> public static double GetVar(CARTNode node) { double ave = 0; if (node.left != null) { ave = node.region.Sum(p => p.vals.LastOrDefault()) / node.region.Count; } else { ave = node.output; } return(node.region.Sum(p => Math.Pow(p.vals.LastOrDefault() - ave, 2))); }
private static CARTTree PrunedClone(CARTTree tree, CARTNode node) { var queue = new Queue <CARTNode>(); // 原始树队列 queue.Enqueue(tree._root); var root_1 = new CARTNode(); // 新树根节点 var queue_1 = new Queue <CARTNode>(); // 同步队列 queue_1.Enqueue(root_1); while (queue.Count > 0) { var n = queue.Dequeue(); var n_1 = queue_1.Dequeue(); if (n == node) // 遇到需要被剪枝的内部节点,则需要剪枝为叶节点 { // 设置叶节点的必要字段 n_1.output = n.region.Sum(p => p.vals.LastOrDefault()) / n.region.Count; n_1.region = n.region; } else { n_1.Update(n); // 更新节点的固有属性(字段) if (n.left != null) { // 非叶节点 n_1.left = new CARTNode() { parent = n_1 }; n_1.right = new CARTNode() { parent = n_1 }; queue.Enqueue(n.left); queue.Enqueue(n.right); queue_1.Enqueue(n_1.left); queue_1.Enqueue(n_1.right); } // else,是叶节点,无需其他操作 } } return(new CARTTree() { _root = root_1 }); }
/// <summary> /// 递归获取模型输出值 /// </summary> /// <param name="point"></param> /// <param name="node"></param> /// <returns></returns> private static double Judge(CARTPoint point, CARTNode node) { if (node.left != null) { return(node.output); } else { if (point.vals[node.j] < node.splitVal) { return(Judge(point, node.left)); } else { return(Judge(point, node.right)); } } }
/// <summary> /// 获取以指定节点为根结点的子树中的所有叶节点 /// </summary> /// <param name="node"></param> /// <returns></returns> public static List <CARTNode> GetLeaves(CARTNode node) { var list = new List <CARTNode>(); var queue = new Queue <CARTNode>(); queue.Enqueue(node); while (queue.Count > 0) { var n = queue.Dequeue(); if (n.left == null) { list.Add(n); } else { queue.Enqueue(n.left); queue.Enqueue(n.right); } } return(list); }
/// <summary> /// 生成决策树 /// </summary> /// <param name="node"></param> /// <param name="attrIdxs"></param> /// <param name="points"></param> private static void Create(CARTNode node, List <int> attrIdxs, List <CARTPoint> points) { node.region = points; // 根据CART分裂策略,分裂后的区域内样本点数量至少为1,不可能为0 if (points.Count == 1) { // 如果为1,则不再分裂,直接设置为叶节点 node.output = points[0].vals.LastOrDefault(); } else { var ave = points.Sum(p => p.vals.LastOrDefault()) / points.Count; // 没有可用于分裂的属性,则设置叶节点 // 输出值的估计为区域中样本点输出值的均值 if (attrIdxs.Count == 0) { node.output = ave; } else { // 先计算整体的样本点的方差,如果小于阈值,则不分裂 double squareErr = 0; foreach (var p in points) { squareErr += (p.vals.LastOrDefault() - ave) * (p.vals.LastOrDefault() - ave); } if (squareErr < ave / 1000) { // 如果方差小于一个阈值,则停止分裂,这里为了简单起见,阈值hardcode node.output = ave; } else { TempResult minTemp = null; // 最小平方误差 int minJ = 0; // 对应的分裂属性索引 for (var i = 0; i < attrIdxs.Count; i++) { var j = attrIdxs[i]; // 输入属性的索引 var temp = CARTUtil.SquareError(j, points); if (minTemp == null || temp.lossVal < minTemp.lossVal) { minTemp = temp; minJ = j; } } // 得到最小平方误差 node.j = minJ; node.splitVal = minTemp.splitVal; node.left = new CARTNode() { parent = node }; node.right = new CARTNode() { parent = node }; var leftAttrIdxs = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList(); var rightAttrIdxs = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList(); // 递归创建左右子节点 Create(node.left, leftAttrIdxs, minTemp.region1); Create(node.right, rightAttrIdxs, minTemp.region2); } } } }