private static CARTTree GetSubTree(CARTTree tree) { var root = tree._root; var stack = new Stack <CARTNode>(); AccessNonLeaf(root, stack); double minAlpha = double.MaxValue; // 最小alpha CARTNode minNode = null; // 最小alpha对应的内部节点 while (stack.Count > 0) { var t = stack.Pop(); // 某一内部节点 // 以 t 为根节点的子树的所有叶节点 var leaves_t = CARTUtil.GetLeaves(t); // 以 t 为单节点,平方误差为 var var_t = CARTUtil.GetVar(t); // 以 t 为根节点的子树,平方误差为 double var_subtree = 0; for (int i = 0; i < leaves_t.Count; i++) { var leaf = leaves_t[i]; var_subtree += CARTUtil.GetVar(leaf); } var alpha_t = (var_t - var_subtree) / (leaves_t.Count - 1); if (minAlpha > alpha_t) { minAlpha = alpha_t; minNode = t; } } // 获得最小的alpha,则对此节点剪枝, // 需要注意的是,由于最终要生成一个子树序列,所以,我们不对原来的树剪枝,而是复制一个树并剪枝 return(PrunedClone(tree, minNode)); }
/// <summary> /// 生成决策树 /// </summary> /// <param name="node"></param> /// <param name="attrIdxs"></param> /// <param name="points"></param> private static void Create(CARTNode node, List <int> attrIdxs, List <CARTPoint> points) { node.region = points; // 根据CART分裂策略,分裂后的区域内样本点数量至少为1,不可能为0 if (points.Count == 1) { // 如果为1,则不再分裂,直接设置为叶节点 node.output = points[0].vals.LastOrDefault(); } else { var ave = points.Sum(p => p.vals.LastOrDefault()) / points.Count; // 没有可用于分裂的属性,则设置叶节点 // 输出值的估计为区域中样本点输出值的均值 if (attrIdxs.Count == 0) { node.output = ave; } else { // 先计算整体的样本点的方差,如果小于阈值,则不分裂 double squareErr = 0; foreach (var p in points) { squareErr += (p.vals.LastOrDefault() - ave) * (p.vals.LastOrDefault() - ave); } if (squareErr < ave / 1000) { // 如果方差小于一个阈值,则停止分裂,这里为了简单起见,阈值hardcode node.output = ave; } else { TempResult minTemp = null; // 最小平方误差 int minJ = 0; // 对应的分裂属性索引 for (var i = 0; i < attrIdxs.Count; i++) { var j = attrIdxs[i]; // 输入属性的索引 var temp = CARTUtil.SquareError(j, points); if (minTemp == null || temp.lossVal < minTemp.lossVal) { minTemp = temp; minJ = j; } } // 得到最小平方误差 node.j = minJ; node.splitVal = minTemp.splitVal; node.left = new CARTNode() { parent = node }; node.right = new CARTNode() { parent = node }; var leftAttrIdxs = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList(); var rightAttrIdxs = attrIdxs.Where(idx => idx != minJ).Select(idx => idx).ToList(); // 递归创建左右子节点 Create(node.left, leftAttrIdxs, minTemp.region1); Create(node.right, rightAttrIdxs, minTemp.region2); } } } }