public static Dictionary <string, DecisionTreePartition <T> > SplitPartitionByClassVariable <T>(DecisionTreePartition <T> partition, DoAttributeCostGainHandle handle_cost) where T : DDataRecord { IEnumerable <T> data_store = partition.DataStore; DecisionTreeNode <T> tree_node = partition.TreeNode; IEnumerable <string> class_variable_value_iterator = tree_node.ClassVariableValueIterator; Dictionary <string, DecisionTreePartition <T> > sub_partitions_by_class_variable = new Dictionary <string, DecisionTreePartition <T> >(); foreach (string variable_value in class_variable_value_iterator) { int record_count = tree_node.FindClassVariableValueCount(variable_value); DecisionTreeNode <T> sub_node = CreateLeaveNode(DecisionTree <T> .ClassVariableName, variable_value, tree_node, record_count); sub_node.HandleCost += (gain, fname) => { if (handle_cost != null) { return(handle_cost(gain, fname)); } return(gain); }; sub_partitions_by_class_variable[variable_value] = DecisionTreePartition <T> .Create(sub_node); } return(sub_partitions_by_class_variable); }
/// <summary> /// split for the root node /// </summary> public void Split(HashSet <string> feature_names, IEnumerable <T> data_store) { DecisionTreePartition <T> root_partition = DecisionTreePartition <T> .Create(this, data_store); DecisionTreeMethods.Split <T>(root_partition, feature_names, (gain, feature_name) => { if (HandleCost != null) { return(HandleCost(gain, feature_name)); } return(gain); }); }
public static Dictionary <string, DecisionTreePartition <T> > SplitPartitionByFeatureVariable <T>(DecisionTreePartition <T> partition, string variable_name, DoAttributeCostGainHandle handle_cost) where T : DDataRecord { IEnumerable <T> data_store = partition.DataStore; DecisionTreeNode <T> tree_node = partition.TreeNode; Dictionary <string, DecisionTreePartition <T> > sub_partitions = new Dictionary <string, DecisionTreePartition <T> >(); Dictionary <string, List <T> > sub_data_stores = new Dictionary <string, List <T> >(); foreach (T rec in data_store) { string variable_value = rec[variable_name]; List <T> sub_data_store = null; if (sub_data_stores.ContainsKey(variable_value)) { sub_data_store = sub_data_stores[variable_value]; } else { sub_data_store = new List <T>(); sub_data_stores[variable_value] = sub_data_store; } sub_data_store.Add(rec); } foreach (string variable_value in sub_data_stores.Keys) { List <T> sub_data_store = sub_data_stores[variable_value]; DecisionTreeNode <T> sub_node = DecisionTreeMethods.CreateTreeNode <T>(variable_name, variable_value, tree_node); sub_node.HandleCost += (gain, fname) => { if (handle_cost != null) { return(handle_cost(gain, fname)); } return(gain); }; sub_partitions[variable_value] = DecisionTreePartition <T> .Create(sub_node, sub_data_store); } return(sub_partitions); }
public KeyValuePair <string, Dictionary <string, DecisionTreeNode <T> > > Prune() { Dictionary <string, DecisionTreeNode <T> > children = mChildren; KeyValuePair <string, Dictionary <string, DecisionTreeNode <T> > > tree_info = new KeyValuePair <string, Dictionary <string, DecisionTreeNode <T> > >( mSplitVariableName, children); DecisionTreePartition <T> partition = DecisionTreePartition <T> .Create(this); Dictionary <string, DecisionTreePartition <T> > sub_partitions_by_class_variable = DecisionTreeMethods.SplitPartitionByClassVariable <T>(partition, (gain, feature_name) => { if (HandleCost != null) { return(HandleCost(gain, feature_name)); } return(gain); }); partition.UpdateSubPartitions(sub_partitions_by_class_variable, DecisionTree <T> .ClassVariableName); return(tree_info); }