public override TreeNode CopyNonLinkingData()
        {
            ClassificationTreeNode new_node = new ClassificationTreeNode();

            new_node.Classification = this.Classification;
            return(new_node);
        }
示例#2
0
        public static TreeNode TreeNodeFactory(GeneticAlgorithmManager ga_mgr, bool ShouldForceTerminal, Tree tree)
        {
            TreeNode node_output;

            bool term_node = ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.prob_node_terminal;

            //TODO: consider changing this or using some other scheme to prevent runaway initial trees.
            if (term_node || ShouldForceTerminal || tree._nodes.Count > ga_mgr._gaOptions.max_node_count_for_new_tree)
            {
                var node = new ClassificationTreeNode();
                node.CreateRandom(ga_mgr);

                node_output = node;
            }
            else
            {
                var node = new YesNoMissingTreeNode();
                node.CreateRandom(ga_mgr);

                node_output = node;
            }

            //TODO there might be a better place for this
            node_output.matrix = new ConfusionMatrix(ga_mgr.dataPointMgr.classes.Length);

            tree.AddNodeWithoutChildren(node_output);
            return(node_output);
        }
示例#3
0
        public void GeneratePredictionsForDataWithAllTrees(string folderPath)
        {
            List <Tree> treesToTest = new List <Tree>();

            foreach (var file in Directory.GetFiles(folderPath))
            {
                var tree = Tree.ReadFromXmlFile(file);
                treesToTest.Add(tree);
                Debug.WriteLine(tree);
            }
            //loop through the data points, and then loop through trees
            //will contain the ID and probability
            var probs = new List <Tuple <string, double> >();

            foreach (var dataPoint in data_mgr._dataPoints)
            {
                double pred_value = 0.0;
                var    results    = new GeneticAlgorithmRunResults(ga_mgr);
                int    count      = 0;
                foreach (var tree in treesToTest)
                {
                    var node = tree._root.TraverseData(dataPoint, results);

                    ClassificationTreeNode termNode = node as ClassificationTreeNode;

                    if (termNode == null)
                    {
                        continue;
                    }
                    else
                    {
                        pred_value += termNode.ProbPrediction;
                        count++;
                    }
                }

                probs.Add(Tuple.Create(dataPoint._id, pred_value / count));
            }
            using (StreamWriter sw = new StreamWriter("submission_" + DateTime.Now.Ticks + ".csv"))
            {
                sw.WriteLine("ID,PredictedProb");
                foreach (var prob in probs)
                {
                    sw.WriteLine("{0},{1:0.0000}", prob.Item1, prob.Item2);
                }
            }
        }
示例#4
0
        public static Tree CreateRandomTree(GeneticAlgorithmManager ga_mgr)
        {
            //build a random tree
            Tree tree = new Tree();

            //this is a dummy node that will get split immediately
            ClassificationTreeNode root = new ClassificationTreeNode();

            tree.AddRootToTree(root);

            var newRoot = SplitClassificationNode(root, ga_mgr);

            //now have three nodes under the root

            //run a queue to create children for non-terminal nodes
            var terminalNodesToSplit = new Queue <ClassificationTreeNode>();

            foreach (var subNode in newRoot._subNodes)
            {
                terminalNodesToSplit.Enqueue(subNode as ClassificationTreeNode);
            }

            while (terminalNodesToSplit.Count > 0 && tree._nodes.Count < ga_mgr._gaOptions.Max_node_count_for_new_tree)
            {
                var node = terminalNodesToSplit.Dequeue();

                //this will progressively split the nodes if rando is less than target
                if (ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.Prob_node_terminal)
                {
                    var newNode = SplitClassificationNode(node, ga_mgr);

                    foreach (var subNode in newNode._subNodes)
                    {
                        terminalNodesToSplit.Enqueue(subNode as ClassificationTreeNode);
                    }
                }
            }

            return(tree);
        }
示例#5
0
        public static TreeNode SplitClassificationNode(ClassificationTreeNode nodeToReplace, GeneticAlgorithmManager ga_mgr)
        {
            var nodeNewDecision = new YesNoMissingTreeNode();

            nodeNewDecision.CreateRandom(ga_mgr);

            //TODO unhardcode this items
            var item1 = 0.0;
            var item2 = 0.0;
            var item3 = 0.0;

            //create the two classification nodes
            var nodeTrue = new ClassificationTreeNode();

            nodeTrue.Classification = item1;
            nodeTrue._parent        = nodeNewDecision;

            var nodeFalse = new ClassificationTreeNode();

            nodeFalse.Classification = item2;
            nodeFalse._parent        = nodeNewDecision;

            var nodeMissing = new ClassificationTreeNode();

            nodeMissing.Classification = item3;
            nodeMissing._parent        = nodeNewDecision;

            nodeNewDecision._trueNode    = nodeTrue;
            nodeNewDecision._falseNode   = nodeFalse;
            nodeNewDecision._missingNode = nodeMissing;

            ReplaceOneNodeWithAnother(nodeToReplace, nodeNewDecision);
            //return the new tree with that change

            //try to optimize the node
            bool opTest = OptimizeTest(nodeNewDecision, ga_mgr);

            return(nodeNewDecision);
        }
示例#6
0
        public void RemoveZeroCountNodes()
        {
            Stack <TreeNode> nodes_to_process = new Stack <TreeNode>();

            nodes_to_process.Push(_root);

            while (nodes_to_process.Count > 0)
            {
                TreeNode node = nodes_to_process.Pop();

                if (node._traverseCount == 0)
                {
                    //create the new node
                    ClassificationTreeNode blank = new ClassificationTreeNode();
                    blank.Classification = -1;
                    blank.matrix         = new ConfusionMatrix(node.matrix._size);
                    blank._parent        = node._parent;

                    //update refs
                    if (node._parent != null)
                    {
                        node._parent.UpdateChildReference(node, blank);
                    }

                    //delete current node
                    RemoveNodeWithChildren(node);
                }
                else
                {
                    foreach (var subNode in node._subNodes)
                    {
                        nodes_to_process.Push(subNode);
                    }
                }
            }
        }
示例#7
0
        public static IEnumerable <Tree> SplitNodeAndOptimizeTests(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //find a grab a tree
            Tree tree1 = treesInPopulation[ga_mgr.rando.Next(treesInPopulation.Count())];

            //uses the traversal count for selecting
            var node_picker = new WeightedSelector <ClassificationTreeNode>(
                tree1.GetNodesOfType <ClassificationTreeNode>().Select(c => Tuple.Create(c, (double)c._traverseCount))
                );

            ClassificationTreeNode node1 = node_picker.PickRandom(ga_mgr.rando);

            tree1.SetStructuralLocationsForNodes();
            if (node1.matrix == null)
            {
            }
            var matrix_rows = node1.matrix.GetRowsOrderedByCount().ToList();

            //trap is here in case there are fewer than 2 "top" rows to split on
            if (matrix_rows.Count >= 2)
            {
                //TODO improve this structural business to be cleaner and more obvious if it belongs to the Node or Tree
                Tree     tree1_copy = tree1.Copy();
                TreeNode node1_copy = tree1_copy.GetNodeAtStructualLocation(node1._structuralLocation);

                //grab the node with the greatest traverse count (that is terminal)
                //determine the two most popular classes from there (rows in the confusion table)
                //create a random test for the current node (change from classification to decision)
                //TODO create a proper factory for this code
                var node1_decision = new YesNoMissingTreeNode();

                node1_decision.CreateRandom(ga_mgr);
                node1_decision._parent = node1_copy._parent;

                var item1 = node1.Classification;
                var item2 = 0;
                var item3 = 0;

                //create the two classification nodes
                var node1a_class = new ClassificationTreeNode();
                node1a_class.Classification = item1;
                node1a_class._parent        = node1_decision;

                var node1b_class = new ClassificationTreeNode();
                node1b_class.Classification = item2;
                node1b_class._parent        = node1_decision;

                var node1c_class = new ClassificationTreeNode();
                node1c_class.Classification = item3;
                node1c_class._parent        = node1_decision;

                node1_decision._trueNode    = node1a_class;
                node1_decision._falseNode   = node1b_class;
                node1_decision._missingNode = node1c_class;

                //add the two nodes with the most popular classes
                //TODO create a "replace node" operation to standardize this code
                tree1_copy.RemoveNodeWithChildren(node1_copy);

                node1_copy.UpdateParentReference(node1_decision);
                tree1_copy.AddNodeWithChildren(node1_decision);
                //return the new tree with that change

                //try to optimize the node
                bool opTest = OptimizeTest(node1_decision, ga_mgr);

                tree1_copy._source = "node split";

                if (opTest)
                {
                    tree1_copy._source += " w/ op";
                }

                yield return(tree1_copy);
            }
        }