public static TreeNode TreeNodeFactory(GeneticAlgorithmManager ga_mgr, bool ShouldForceTerminal, Tree tree) { TreeNode node_output; bool term_node = ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.prob_node_terminal; //TODO: consider changing this or using some other scheme to prevent runaway initial trees. if (term_node || ShouldForceTerminal || tree._nodes.Count > ga_mgr._gaOptions.max_node_count_for_new_tree) { var node = new ClassificationTreeNode(); node.CreateRandom(ga_mgr); node_output = node; } else { var node = new YesNoMissingTreeNode(); node.CreateRandom(ga_mgr); node_output = node; } //TODO there might be a better place for this node_output.matrix = new ConfusionMatrix(ga_mgr.dataPointMgr.classes.Length); tree.AddNodeWithoutChildren(node_output); return(node_output); }
public static TreeNode SplitClassificationNode(ClassificationTreeNode nodeToReplace, GeneticAlgorithmManager ga_mgr) { var nodeNewDecision = new YesNoMissingTreeNode(); nodeNewDecision.CreateRandom(ga_mgr); //TODO unhardcode this items var item1 = 0.0; var item2 = 0.0; var item3 = 0.0; //create the two classification nodes var nodeTrue = new ClassificationTreeNode(); nodeTrue.Classification = item1; nodeTrue._parent = nodeNewDecision; var nodeFalse = new ClassificationTreeNode(); nodeFalse.Classification = item2; nodeFalse._parent = nodeNewDecision; var nodeMissing = new ClassificationTreeNode(); nodeMissing.Classification = item3; nodeMissing._parent = nodeNewDecision; nodeNewDecision._trueNode = nodeTrue; nodeNewDecision._falseNode = nodeFalse; nodeNewDecision._missingNode = nodeMissing; ReplaceOneNodeWithAnother(nodeToReplace, nodeNewDecision); //return the new tree with that change //try to optimize the node bool opTest = OptimizeTest(nodeNewDecision, ga_mgr); return(nodeNewDecision); }
public static IEnumerable <Tree> SplitNodeAndOptimizeTests(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation) { //find a grab a tree Tree tree1 = treesInPopulation[ga_mgr.rando.Next(treesInPopulation.Count())]; //uses the traversal count for selecting var node_picker = new WeightedSelector <ClassificationTreeNode>( tree1.GetNodesOfType <ClassificationTreeNode>().Select(c => Tuple.Create(c, (double)c._traverseCount)) ); ClassificationTreeNode node1 = node_picker.PickRandom(ga_mgr.rando); tree1.SetStructuralLocationsForNodes(); if (node1.matrix == null) { } var matrix_rows = node1.matrix.GetRowsOrderedByCount().ToList(); //trap is here in case there are fewer than 2 "top" rows to split on if (matrix_rows.Count >= 2) { //TODO improve this structural business to be cleaner and more obvious if it belongs to the Node or Tree Tree tree1_copy = tree1.Copy(); TreeNode node1_copy = tree1_copy.GetNodeAtStructualLocation(node1._structuralLocation); //grab the node with the greatest traverse count (that is terminal) //determine the two most popular classes from there (rows in the confusion table) //create a random test for the current node (change from classification to decision) //TODO create a proper factory for this code var node1_decision = new YesNoMissingTreeNode(); node1_decision.CreateRandom(ga_mgr); node1_decision._parent = node1_copy._parent; var item1 = node1.Classification; var item2 = 0; var item3 = 0; //create the two classification nodes var node1a_class = new ClassificationTreeNode(); node1a_class.Classification = item1; node1a_class._parent = node1_decision; var node1b_class = new ClassificationTreeNode(); node1b_class.Classification = item2; node1b_class._parent = node1_decision; var node1c_class = new ClassificationTreeNode(); node1c_class.Classification = item3; node1c_class._parent = node1_decision; node1_decision._trueNode = node1a_class; node1_decision._falseNode = node1b_class; node1_decision._missingNode = node1c_class; //add the two nodes with the most popular classes //TODO create a "replace node" operation to standardize this code tree1_copy.RemoveNodeWithChildren(node1_copy); node1_copy.UpdateParentReference(node1_decision); tree1_copy.AddNodeWithChildren(node1_decision); //return the new tree with that change //try to optimize the node bool opTest = OptimizeTest(node1_decision, ga_mgr); tree1_copy._source = "node split"; if (opTest) { tree1_copy._source += " w/ op"; } yield return(tree1_copy); } }