示例#1
0
        /// <summary>
        /// 根据样本数据创建CART决策树
        /// </summary>
        /// <param name="data"></param>
        /// <returns></returns>
        public static CARTTree Create(CARTData data)
        {
            var tree = new CARTTree()
            {
                _root = new CARTNode()
            };
            var attrIdxs = Enumerable.Range(0, data.J - 1).ToList();   // 输入属性索引列表

            Create(tree._root, attrIdxs, data.trainSet);
            return(Prune(tree, data));
        }
示例#2
0
        /// <summary>
        /// 剪枝
        /// </summary>
        /// <param name="tree">完全生长的决策树</param>
        /// <param name="data">提供验证数据集</param>
        /// <returns></returns>
        private static CARTTree Prune(CARTTree tree, CARTData data)
        {
            // 获取最优子树序列
            var list = new List <CARTTree>()
            {
                tree
            };                                           // 最优子树序列
            var curTree = tree;

            while (!Is_ThreeNode_Tree(curTree))
            {
                curTree = GetSubTree(curTree);
                list.Add(curTree);
            }

            // 使用验证集获得最终的最优子树

            // 验证集,最小平方误差
            double   min_err   = double.MaxValue;
            CARTTree best_tree = null;      // 最小平方误差对应的最优子树

            for (int i = 0; i < list.Count; i++)
            {
                var    sub_tree     = list[i];
                double squareErrSum = 0;
                for (int k = 0; k < data.verifySet.Count; k++)
                {
                    squareErrSum += Judge(data.verifySet[k], sub_tree);
                }
                if (min_err > squareErrSum)
                {
                    min_err   = squareErrSum;
                    best_tree = sub_tree;
                }
            }
            return(best_tree);
        }