public static Node1 FindBestSplit(Node1 node, List <int> nums, int[] isUsed) { // 计算node子树系统的信息熵 double entropy = CalEntropy(node.classCount, node.count); if (IfStop(node, entropy, isUsed)) { return(node); // 停止分裂,对node做适当的赋值以标记分裂停止,返回这个节点 } var info = new SplitInfo1(); var countAll = node.count; for (int i = 0; i < isUsed.Length - 1; i++) { if (isUsed[i] == 1) { continue; } if (types[i] == 0) // 离散型属性值 { } } return(null); }
/// <summary> /// 是否停止分裂 /// </summary> /// <param name="node"></param> /// <param name="entropy"></param> /// <param name="isUsed"></param> /// <returns></returns> public static bool IfStop(Node1 node, double entropy, int[] isUsed) { var counts = node.classCount; var countAll = node.count; int maxIndex = 0; // 占比最大的分类值在 classCount 数组中的索引 int deep = node.deep; // 最后一项表示分类,而非输入特征属性 bool flag = true; // 如果输入属性已经用完 for (int i = 0; i < isUsed.Length - 1; i++) { if (isUsed[i] == 0) { flag = false; // 输入属性尚未用完 break; } } if (deep >= maxDeep || entropy == 0 || countAll < LimitCount || flag) { maxIndex = node.result + 1; node.feature_type = "result"; node.features = new List <string>() { maxIndex + "" }; node.leafError = countAll - (int)counts[node.result]; node.leafCount = 1; return(true); } return(false); }