public override TreeTest Copy() { var test_copy = new EqualTreeTest(); test_copy._param = this._param; test_copy._valTest = this._valTest; test_copy._testCol = this._testCol; return(test_copy); }
public static TreeTest TreeTestFactory(DataPointManager dataPointMgr, Random rando) { //TODO this shoudl take a ga_mgr instead of the parts //TODO clean up this mess once the DataColumns quit using the TYPE part TreeTest output; var col_param = rando.Next(dataPointMgr._columns.Count); DataColumn column = dataPointMgr._columns[col_param]; switch (column._type) { case DataColumn.DataValueTypes.NUMBER: LessThanEqualTreeTest test = new LessThanEqualTreeTest(); test.param = col_param; test.valTest = column.GetTestValue(rando); output = test; break; case DataColumn.DataValueTypes.CATEGORY: var cat_column = column as CategoryDataColumn; var categories = cat_column._codebook.GetCategories(); var category_count = categories.Count(); //toss a coin to decide on subsetter EqualTreeTest test_eq = new EqualTreeTest(); test_eq._param = col_param; test_eq._valTest = column.GetTestValue(rando); output = test_eq; break; default: throw new ArgumentOutOfRangeException(); } output._testCol = column; return(output); }
public static bool OptimizeTest(YesNoMissingTreeNode node1_copy, GeneticAlgorithmManager ga_mgr) { if (node1_copy.Test is LessThanEqualTreeTest) { LessThanEqualTreeTest test = node1_copy.Test as LessThanEqualTreeTest; if (test == null) { return(false); } //iterate through all values, make split, test impurity var values = ga_mgr.dataPointMgr._pointsToTest.Select(c => c._data[test.param]); var all_uniques = values.Where(c => !c._isMissing).Select(c => c._value).Distinct().OrderBy(c => c).ToArray(); List <double> all_splits = new List <double>(); for (int i = 1; i < all_uniques.Length; i++) { all_splits.Add(0.5 * (all_uniques[i] + all_uniques[i - 1])); } double best_split = double.NaN; double best_purity = double.MinValue; //TODO improve this selection for how many split points to consider foreach (var split in all_splits.TakeEvery(all_splits.Count / 10 + 1)) { //change the test value and find the best purity test.valTest = split; var results = new GeneticAlgorithmRunResults(ga_mgr); node1_copy._tree.ProcessDataThroughTree(ga_mgr.dataPointMgr, results, ga_mgr.dataPointMgr._pointsToTest); //check the result of the split var gini_d = node1_copy.matrix.GiniImpuritySqrt; double gini_split = 0.0; int count = 0; foreach (var node in node1_copy._subNodes) { gini_split += node.matrix._count * node.matrix.GiniImpuritySqrt; count += node.matrix._count; } gini_split /= count; double gini_gain = gini_d - gini_split; if (gini_gain > best_purity) { best_split = split; best_purity = gini_gain; } } test.valTest = best_split; } else if (node1_copy.Test is EqualTreeTest) { EqualTreeTest test = node1_copy.Test as EqualTreeTest; if (test == null) { return(false); } //iterate through all values, make split, test impurity var values = ga_mgr.dataPointMgr._pointsToTest.Select(c => c._data[test._param]); IEnumerable <double> all_uniques = values.Where(c => !c._isMissing).Select(c => c._value).Distinct().OrderBy(c => c); var unique_count = all_uniques.Count(); if (unique_count > 10) { all_uniques = all_uniques.TakeEvery(unique_count / 10 + 1); } double best_split = double.NaN; double best_purity = double.MinValue; //TODO improve this selection for how many split points to consider foreach (var split in all_uniques) { //change the test value and find the best purity test._valTest = split; var results = new GeneticAlgorithmRunResults(ga_mgr); node1_copy._tree.ProcessDataThroughTree(ga_mgr.dataPointMgr, results, ga_mgr.dataPointMgr._pointsToTest); var gini_d = node1_copy.matrix.GiniImpuritySqrt; double gini_split = 0.0; int count = 0; foreach (var node in node1_copy._subNodes) { gini_split += node.matrix._count * node.matrix.GiniImpuritySqrt; count += node.matrix._count; } gini_split /= count; double gini_gain = gini_d - gini_split; if (gini_gain > best_purity) { best_split = split; best_purity = gini_gain; } } test._valTest = best_split; } else { return(false); } return(true); }