public static void FindBetterParameter(LearningTable education_table, ref int index_of_parametr, ref string best_value_for_split, int inputs, int outputs) { index_of_parametr = 0; best_value_for_split = ""; LearningClassInfo[] leftClassInf; LearningClassInfo[] rightClassInf; double entrValue = -100000; for (int index = 0; index < inputs; index++) { education_table.QuickSortByParam(0, education_table.LearningData.Length - 1, index); double average = 0; for (int prevRowInd = 0, nextRowInd = 1; nextRowInd < education_table.LearningData.Length; prevRowInd++, nextRowInd++) { average = (education_table.LearningData[prevRowInd][index] + education_table.LearningData[nextRowInd][index]) / 2.0; leftClassInf = education_table.ClassInfoInit(education_table, 0, nextRowInd); rightClassInf = education_table.ClassInfoInit(education_table, nextRowInd, education_table.LearningClasses.Length); double newEntrValue = EntrophyCalculator.Info(education_table) - EntrophyCalculator.EntrophyCalc(leftClassInf, rightClassInf); if (newEntrValue > entrValue) { entrValue = newEntrValue; index_of_parametr = index; best_value_for_split = average.ToString(); } for (int i = 0; i < leftClassInf.Length; i++) { leftClassInf[i].number_of_checked = 0; rightClassInf[i].number_of_checked = 0; } } } }
public static double Info(LearningTable T) { double info = 0; double P = 0; LearningClassInfo[] classInfo = T.ClassInfoInit(T); for (int i = 0; i < classInfo.Length; i++) { P = (double)classInfo[i].number_of_checked / T.LearningClasses.Length; if (P != 0) { info += P * Math.Log(P, 2); } } return(-1 * info); }
public void LearningC4_5(LearningTable education_table, Node tree_node, int inputs, int outputs) { LearningClassInfo[] thisClassInfo = education_table.ClassInfoInit(education_table, 0, education_table.LearningClasses.Length); int k = 0; foreach (LearningClassInfo clinf in thisClassInfo) { if (clinf.number_of_checked >= 1) { k++; } } if (k >= 2) { LearningTable left_table = new LearningTable(); LearningTable right_table = new LearningTable(); tree_node.is_leaf = false; int index_of_parametr = 0; string best_value_for_split = ""; TreeBuilder.FindBetterParameter(education_table, ref index_of_parametr, ref best_value_for_split, inputs, outputs); tree_node.rule = new Rule(); tree_node.rule.index_of_param = index_of_parametr; tree_node.rule.value = (float)Convert.ToDouble(best_value_for_split); tree_node.left_child = new Node(); tree_node.right_child = new Node(); left_table.SplitLearningTable(education_table, tree_node.rule, ref left_table, ref right_table); LearningC4_5(left_table, tree_node.left_child, inputs, outputs); LearningC4_5(right_table, tree_node.right_child, inputs, outputs); } else { tree_node.is_leaf = true; tree_node.rule = new Rule(); foreach (LearningClassInfo clinf in thisClassInfo) { if (clinf.number_of_checked > 0) { tree_node.rule.value = clinf.class_name; } } } }