private void update_model_with_line(ref DecisionTree model, string line) { string[] split = line.Split('|'); string type = split[0]; string identifier = split[1]; string parentidentifier = split[2]; Node parent = null; if (this.node_identifiers.ContainsKey(parentidentifier)) { parent = this.node_identifiers[parentidentifier]; } else { // We could not find the parent for this element. Maybe its the root element? If so, we can accept parent equalling null. if (parentidentifier != "ROOT") { // Its not root! Throw an exception. throw new Exception($"Could not find parent {parentidentifier} of {identifier}"); } } if (type == "NODE") { string typeOfNode = split[3]; string label = split[4]; string value_split = split[5]; Node node = null; if (typeOfNode == "C") { // Continuous node string threshold_string = split[6]; double threshold = double.Parse(threshold_string); node = model.addContinuousNode(label, value_split, threshold, parent); } else { // Nominal node node = model.addNode(label, value_split, parent); } node.identifier = identifier; this.node_identifiers[identifier] = node; } else if (type == "LEAF") { string leaf_value_splitter = split[3]; string leaf_classifier = split[4]; double leaf_certainty = double.Parse(split[5]); Leaf leaf = model.addUncertainLeaf(leaf_value_splitter, leaf_classifier, parent, leaf_certainty); leaf.identifier = identifier; } else { throw new Exception($"UNKNOWN TREE ELEMENT:{type}"); } }
private DecisionTree iterate(DecisionTree tree, List <DataInstance> set, Dictionary <string, string> attributes, Node parent, string last_split) { this.agent.THINK("iterate").finish(); // Calculate gains and thresholds. Dictionary <string, Dictionary <string, double> > gains_and_thresholds = calculate_attribute_gain_ratios(set, target_attribute, attributes); Dictionary <string, double> thresholds = gains_and_thresholds["thresholds"]; Tuple <string, Dictionary <string, List <DataInstance> > > attributeFound = this.findAttributeSplit(tree, set, attributes, gains_and_thresholds, parent, last_split); // We need to know what the best attribute to split on is, and what the subsets of splitting on it would be. string best_split_attribute = attributeFound.Item1; Dictionary <string, List <DataInstance> > subsets = attributeFound.Item2; double threshold = -1000000; // This is to come to the same result as J48 [TODO: This has to go at some point] if (!have_been_at_root && best_split_attribute == "petal-length") { have_been_at_root = true; Console.WriteLine("Adjust to J48"); best_split_attribute = "petal-width"; threshold = thresholds[best_split_attribute]; } bool split_on_continuous = (best_split_attribute != "[INTERNAL_VARIABLE]-NOTFOUND") ? attributes[best_split_attribute] == "continuous" : false; // Check if a split attribute could even be found Dictionary <string, object> checkBestAttributeWasPossibleState = StateRecording.generateState("attribute_was_found", best_split_attribute == "[INTERNAL_VARIABLE]-NOTFOUND" ? "TRUE" : "FALSE", "best_attribute", best_split_attribute, "suggested_threshold", (split_on_continuous) ? (double?)thresholds[best_split_attribute] : null, "parent_id", (parent != null) ? parent.identifier : "NULL", "parent_attribute", (parent != null) ? parent.label : "NULL", "previous_value_split", (last_split != null) ? last_split : "", "parent_threshold", (parent != null && parent is ContinuousNode) ? (double?)((ContinuousNode)parent).threshold : null); if (best_split_attribute == "[INTERNAL_VARIABLE]-NOTFOUND") { // Okay so the subset we received could not be split such that it did not create too small of a leaf. // Therefore we will make an estimation leaf and move up. agent.THINK("add-estimation-leaf").setState(checkBestAttributeWasPossibleState).finish(); tree = this.addEstimationLeaf(tree, set, parent, last_split); return(tree); } // If we got here then we did not return an estimation leaf and therefore we found a suitable attribute to split on! agent.THINK("add-node").setState(checkBestAttributeWasPossibleState).finish(); if (split_on_continuous) { threshold = thresholds[best_split_attribute]; } // Get started on making a node Dictionary <string, string> attributes_for_further_iteration = AttributeHelper.CopyAttributeDictionary(attributes); // We now know the best splitting attribute and how to split it. Node newnode = null; if (split_on_continuous) { newnode = tree.addContinuousNode(best_split_attribute, last_split, threshold, parent); } else { newnode = tree.addNode(best_split_attribute, last_split, parent); attributes_for_further_iteration.Remove(best_split_attribute); } // We now have a dictionary where each string represents the value split and the list of datainstances is the subset. foreach (string subset_splitter in subsets.Keys) { List <DataInstance> subset = subsets[subset_splitter]; agent.THINK("subset-on-value").finish(); bool uniformClassifier = false; if (subset.Count > 0) { uniformClassifier = SetHelper.hasUniformClassifier(subset, target_attribute); } Dictionary <string, object> state = StateRecording.generateState("set_count", subset.Count, "set_has_uniform_classifier", (subset.Count > 0) ? (uniformClassifier ? "TRUE" : "FALSE") : "EMPTY SET", "chosen_attribute", best_split_attribute, "value_split", subset_splitter, "possible_attribute_count", attributes_for_further_iteration.Count, "chosen_threshold", (split_on_continuous) ? (double?)thresholds[best_split_attribute] : null, "current_node_id", newnode.identifier, "parent_id", (parent != null) ? parent.identifier : "NULL", "parent_attribute", (parent != null) ? parent.label : "NULL", "previous_value_split", (last_split != null) ? last_split : "", "parent_threshold", (parent != null && parent is ContinuousNode) ? (double?)((ContinuousNode)parent).threshold : null); if (subset.Count == 0) { // There are no more of this subset. We need to skip this iteration. agent.THINK("ignore-value").setState(state).finish(); continue; } if (uniformClassifier) { // This subset doesn't have to be split anymore. We can just add it to the node as a leaf. // Each leaf represents one decision rule. string classifier_value = subset.First().getProperty(target_attribute); double certainty = 0; // Calculate the certainty of this leaf. It's the average weight of the dataset. foreach (DataInstance instance in subset) { certainty += instance.getWeight(); } certainty /= (double)subset.Count; agent.THINK("add-leaf").setState(state).finish(); Leaf leaf = tree.addUncertainLeaf(subset_splitter, classifier_value, newnode, certainty); tree.data_locations[leaf] = subset; } else { // We still haven't resolved this set. We need to iterate upon it to split it again. if (attributes_for_further_iteration.Count == 0) { // If this happens than we have no more agent.THINK("add-majority-leaf").setState(state).finish(); tree = this.addEstimationLeaf(tree, subset, newnode, subset_splitter); } else { // We still have attributes left, we can continue further! agent.THINK("iterate-further").setState(state).finish(); tree = this.iterate(tree, subset, attributes_for_further_iteration, newnode, subset_splitter); } // If we got here in the code then the set that was previously not all the same classifier has been resolved. // Therefore we can let the foreach continue further! } } // The set that we have received has been dealt with completely. We can now move up! agent.THINK("end-value-loop").finish(); if (parent != null) { agent.THINK("return-tree-to-self").finish(); } return(tree); }