public override TreeNode CopyNonLinkingData() { ClassificationTreeNode new_node = new ClassificationTreeNode(); new_node.Classification = this.Classification; return(new_node); }
public static TreeNode TreeNodeFactory(GeneticAlgorithmManager ga_mgr, bool ShouldForceTerminal, Tree tree) { TreeNode node_output; bool term_node = ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.prob_node_terminal; //TODO: consider changing this or using some other scheme to prevent runaway initial trees. if (term_node || ShouldForceTerminal || tree._nodes.Count > ga_mgr._gaOptions.max_node_count_for_new_tree) { var node = new ClassificationTreeNode(); node.CreateRandom(ga_mgr); node_output = node; } else { var node = new YesNoMissingTreeNode(); node.CreateRandom(ga_mgr); node_output = node; } //TODO there might be a better place for this node_output.matrix = new ConfusionMatrix(ga_mgr.dataPointMgr.classes.Length); tree.AddNodeWithoutChildren(node_output); return(node_output); }
public void GeneratePredictionsForDataWithAllTrees(string folderPath) { List <Tree> treesToTest = new List <Tree>(); foreach (var file in Directory.GetFiles(folderPath)) { var tree = Tree.ReadFromXmlFile(file); treesToTest.Add(tree); Debug.WriteLine(tree); } //loop through the data points, and then loop through trees //will contain the ID and probability var probs = new List <Tuple <string, double> >(); foreach (var dataPoint in data_mgr._dataPoints) { double pred_value = 0.0; var results = new GeneticAlgorithmRunResults(ga_mgr); int count = 0; foreach (var tree in treesToTest) { var node = tree._root.TraverseData(dataPoint, results); ClassificationTreeNode termNode = node as ClassificationTreeNode; if (termNode == null) { continue; } else { pred_value += termNode.ProbPrediction; count++; } } probs.Add(Tuple.Create(dataPoint._id, pred_value / count)); } using (StreamWriter sw = new StreamWriter("submission_" + DateTime.Now.Ticks + ".csv")) { sw.WriteLine("ID,PredictedProb"); foreach (var prob in probs) { sw.WriteLine("{0},{1:0.0000}", prob.Item1, prob.Item2); } } }
public static Tree CreateRandomTree(GeneticAlgorithmManager ga_mgr) { //build a random tree Tree tree = new Tree(); //this is a dummy node that will get split immediately ClassificationTreeNode root = new ClassificationTreeNode(); tree.AddRootToTree(root); var newRoot = SplitClassificationNode(root, ga_mgr); //now have three nodes under the root //run a queue to create children for non-terminal nodes var terminalNodesToSplit = new Queue <ClassificationTreeNode>(); foreach (var subNode in newRoot._subNodes) { terminalNodesToSplit.Enqueue(subNode as ClassificationTreeNode); } while (terminalNodesToSplit.Count > 0 && tree._nodes.Count < ga_mgr._gaOptions.Max_node_count_for_new_tree) { var node = terminalNodesToSplit.Dequeue(); //this will progressively split the nodes if rando is less than target if (ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.Prob_node_terminal) { var newNode = SplitClassificationNode(node, ga_mgr); foreach (var subNode in newNode._subNodes) { terminalNodesToSplit.Enqueue(subNode as ClassificationTreeNode); } } } return(tree); }
public static TreeNode SplitClassificationNode(ClassificationTreeNode nodeToReplace, GeneticAlgorithmManager ga_mgr) { var nodeNewDecision = new YesNoMissingTreeNode(); nodeNewDecision.CreateRandom(ga_mgr); //TODO unhardcode this items var item1 = 0.0; var item2 = 0.0; var item3 = 0.0; //create the two classification nodes var nodeTrue = new ClassificationTreeNode(); nodeTrue.Classification = item1; nodeTrue._parent = nodeNewDecision; var nodeFalse = new ClassificationTreeNode(); nodeFalse.Classification = item2; nodeFalse._parent = nodeNewDecision; var nodeMissing = new ClassificationTreeNode(); nodeMissing.Classification = item3; nodeMissing._parent = nodeNewDecision; nodeNewDecision._trueNode = nodeTrue; nodeNewDecision._falseNode = nodeFalse; nodeNewDecision._missingNode = nodeMissing; ReplaceOneNodeWithAnother(nodeToReplace, nodeNewDecision); //return the new tree with that change //try to optimize the node bool opTest = OptimizeTest(nodeNewDecision, ga_mgr); return(nodeNewDecision); }
public void RemoveZeroCountNodes() { Stack <TreeNode> nodes_to_process = new Stack <TreeNode>(); nodes_to_process.Push(_root); while (nodes_to_process.Count > 0) { TreeNode node = nodes_to_process.Pop(); if (node._traverseCount == 0) { //create the new node ClassificationTreeNode blank = new ClassificationTreeNode(); blank.Classification = -1; blank.matrix = new ConfusionMatrix(node.matrix._size); blank._parent = node._parent; //update refs if (node._parent != null) { node._parent.UpdateChildReference(node, blank); } //delete current node RemoveNodeWithChildren(node); } else { foreach (var subNode in node._subNodes) { nodes_to_process.Push(subNode); } } } }
public static IEnumerable <Tree> SplitNodeAndOptimizeTests(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation) { //find a grab a tree Tree tree1 = treesInPopulation[ga_mgr.rando.Next(treesInPopulation.Count())]; //uses the traversal count for selecting var node_picker = new WeightedSelector <ClassificationTreeNode>( tree1.GetNodesOfType <ClassificationTreeNode>().Select(c => Tuple.Create(c, (double)c._traverseCount)) ); ClassificationTreeNode node1 = node_picker.PickRandom(ga_mgr.rando); tree1.SetStructuralLocationsForNodes(); if (node1.matrix == null) { } var matrix_rows = node1.matrix.GetRowsOrderedByCount().ToList(); //trap is here in case there are fewer than 2 "top" rows to split on if (matrix_rows.Count >= 2) { //TODO improve this structural business to be cleaner and more obvious if it belongs to the Node or Tree Tree tree1_copy = tree1.Copy(); TreeNode node1_copy = tree1_copy.GetNodeAtStructualLocation(node1._structuralLocation); //grab the node with the greatest traverse count (that is terminal) //determine the two most popular classes from there (rows in the confusion table) //create a random test for the current node (change from classification to decision) //TODO create a proper factory for this code var node1_decision = new YesNoMissingTreeNode(); node1_decision.CreateRandom(ga_mgr); node1_decision._parent = node1_copy._parent; var item1 = node1.Classification; var item2 = 0; var item3 = 0; //create the two classification nodes var node1a_class = new ClassificationTreeNode(); node1a_class.Classification = item1; node1a_class._parent = node1_decision; var node1b_class = new ClassificationTreeNode(); node1b_class.Classification = item2; node1b_class._parent = node1_decision; var node1c_class = new ClassificationTreeNode(); node1c_class.Classification = item3; node1c_class._parent = node1_decision; node1_decision._trueNode = node1a_class; node1_decision._falseNode = node1b_class; node1_decision._missingNode = node1c_class; //add the two nodes with the most popular classes //TODO create a "replace node" operation to standardize this code tree1_copy.RemoveNodeWithChildren(node1_copy); node1_copy.UpdateParentReference(node1_decision); tree1_copy.AddNodeWithChildren(node1_decision); //return the new tree with that change //try to optimize the node bool opTest = OptimizeTest(node1_decision, ga_mgr); tree1_copy._source = "node split"; if (opTest) { tree1_copy._source += " w/ op"; } yield return(tree1_copy); } }