/// <summary> /// 根据样本数据创建CART决策树 /// </summary> /// <param name="data"></param> /// <returns></returns> public static CARTTree Create(CARTData data) { var tree = new CARTTree() { _root = new CARTNode() }; var attrIdxs = Enumerable.Range(0, data.J - 1).ToList(); // 输入属性索引列表 Create(tree._root, attrIdxs, data.trainSet); return(Prune(tree, data)); }
/// <summary> /// 剪枝 /// </summary> /// <param name="tree">完全生长的决策树</param> /// <param name="data">提供验证数据集</param> /// <returns></returns> private static CARTTree Prune(CARTTree tree, CARTData data) { // 获取最优子树序列 var list = new List <CARTTree>() { tree }; // 最优子树序列 var curTree = tree; while (!Is_ThreeNode_Tree(curTree)) { curTree = GetSubTree(curTree); list.Add(curTree); } // 使用验证集获得最终的最优子树 // 验证集,最小平方误差 double min_err = double.MaxValue; CARTTree best_tree = null; // 最小平方误差对应的最优子树 for (int i = 0; i < list.Count; i++) { var sub_tree = list[i]; double squareErrSum = 0; for (int k = 0; k < data.verifySet.Count; k++) { squareErrSum += Judge(data.verifySet[k], sub_tree); } if (min_err > squareErrSum) { min_err = squareErrSum; best_tree = sub_tree; } } return(best_tree); }