private static CARTTree GetSubTree(CARTTree tree) { var root = tree._root; var stack = new Stack <CARTNode>(); AccessNonLeaf(root, stack); double minAlpha = double.MaxValue; // 最小alpha CARTNode minNode = null; // 最小alpha对应的内部节点 while (stack.Count > 0) { var t = stack.Pop(); // 某一内部节点 // 以 t 为根节点的子树的所有叶节点 var leaves_t = CARTUtil.GetLeaves(t); // 以 t 为单节点,平方误差为 var var_t = CARTUtil.GetVar(t); // 以 t 为根节点的子树,平方误差为 double var_subtree = 0; for (int i = 0; i < leaves_t.Count; i++) { var leaf = leaves_t[i]; var_subtree += CARTUtil.GetVar(leaf); } var alpha_t = (var_t - var_subtree) / (leaves_t.Count - 1); if (minAlpha > alpha_t) { minAlpha = alpha_t; minNode = t; } } // 获得最小的alpha,则对此节点剪枝, // 需要注意的是,由于最终要生成一个子树序列,所以,我们不对原来的树剪枝,而是复制一个树并剪枝 return(PrunedClone(tree, minNode)); }
/// <summary> /// 根据样本数据创建CART决策树 /// </summary> /// <param name="data"></param> /// <returns></returns> public static CARTTree Create(CARTData data) { var tree = new CARTTree() { _root = new CARTNode() }; var attrIdxs = Enumerable.Range(0, data.J - 1).ToList(); // 输入属性索引列表 Create(tree._root, attrIdxs, data.trainSet); return(Prune(tree, data)); }
/// <summary> /// 是否是三节点组成的树, 即一个根节点加两个子节点 /// </summary> /// <param name="tree"></param> /// <returns></returns> private static bool Is_ThreeNode_Tree(CARTTree tree) { var left = tree._root.left; var right = tree._root.right; if (left.left != null || right.left != null) { return(false); } return(true); }
private static CARTTree PrunedClone(CARTTree tree, CARTNode node) { var queue = new Queue <CARTNode>(); // 原始树队列 queue.Enqueue(tree._root); var root_1 = new CARTNode(); // 新树根节点 var queue_1 = new Queue <CARTNode>(); // 同步队列 queue_1.Enqueue(root_1); while (queue.Count > 0) { var n = queue.Dequeue(); var n_1 = queue_1.Dequeue(); if (n == node) // 遇到需要被剪枝的内部节点,则需要剪枝为叶节点 { // 设置叶节点的必要字段 n_1.output = n.region.Sum(p => p.vals.LastOrDefault()) / n.region.Count; n_1.region = n.region; } else { n_1.Update(n); // 更新节点的固有属性(字段) if (n.left != null) { // 非叶节点 n_1.left = new CARTNode() { parent = n_1 }; n_1.right = new CARTNode() { parent = n_1 }; queue.Enqueue(n.left); queue.Enqueue(n.right); queue_1.Enqueue(n_1.left); queue_1.Enqueue(n_1.right); } // else,是叶节点,无需其他操作 } } return(new CARTTree() { _root = root_1 }); }
/// <summary> /// 剪枝 /// </summary> /// <param name="tree">完全生长的决策树</param> /// <param name="data">提供验证数据集</param> /// <returns></returns> private static CARTTree Prune(CARTTree tree, CARTData data) { // 获取最优子树序列 var list = new List <CARTTree>() { tree }; // 最优子树序列 var curTree = tree; while (!Is_ThreeNode_Tree(curTree)) { curTree = GetSubTree(curTree); list.Add(curTree); } // 使用验证集获得最终的最优子树 // 验证集,最小平方误差 double min_err = double.MaxValue; CARTTree best_tree = null; // 最小平方误差对应的最优子树 for (int i = 0; i < list.Count; i++) { var sub_tree = list[i]; double squareErrSum = 0; for (int k = 0; k < data.verifySet.Count; k++) { squareErrSum += Judge(data.verifySet[k], sub_tree); } if (min_err > squareErrSum) { min_err = squareErrSum; best_tree = sub_tree; } } return(best_tree); }
/// <summary> /// 决策:根据输入计算模型输出值 /// </summary> /// <param name="point">样本点</param> /// <param name="tree">决策树</param> /// <returns></returns> public static double Judge(CARTPoint point, CARTTree tree) => Judge(point, tree._root);