Exemplo n.º 1
0
        public static TreeNode TreeNodeFactory(GeneticAlgorithmManager ga_mgr, bool ShouldForceTerminal, Tree tree)
        {
            TreeNode node_output;

            bool term_node = ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.prob_node_terminal;

            //TODO: consider changing this or using some other scheme to prevent runaway initial trees.
            if (term_node || ShouldForceTerminal || tree._nodes.Count > ga_mgr._gaOptions.max_node_count_for_new_tree)
            {
                var node = new ClassificationTreeNode();
                node.CreateRandom(ga_mgr);

                node_output = node;
            }
            else
            {
                var node = new YesNoMissingTreeNode();
                node.CreateRandom(ga_mgr);

                node_output = node;
            }

            //TODO there might be a better place for this
            node_output.matrix = new ConfusionMatrix(ga_mgr.dataPointMgr.classes.Length);

            tree.AddNodeWithoutChildren(node_output);
            return(node_output);
        }
Exemplo n.º 2
0
        public override TreeNode CopyNonLinkingData()
        {
            var new_node = new YesNoMissingTreeNode();

            new_node.Test = this.Test.Copy();
            return(new_node);
        }
Exemplo n.º 3
0
        public override TreeNode ReturnFullyLinkedCopyOfSelf()
        {
            //know that it is a decision tree since it is self
            YesNoMissingTreeNode self_copy    = (YesNoMissingTreeNode)this.CopyNonLinkingData();
            TreeNode             true_copy    = _trueNode.ReturnFullyLinkedCopyOfSelf();
            TreeNode             false_copy   = _falseNode.ReturnFullyLinkedCopyOfSelf();
            TreeNode             missing_copy = _missingNode.ReturnFullyLinkedCopyOfSelf();

            self_copy._trueNode    = true_copy;
            self_copy._falseNode   = false_copy;
            self_copy._missingNode = missing_copy;
            true_copy._parent      = self_copy;
            false_copy._parent     = self_copy;
            missing_copy._parent   = self_copy;
            return(self_copy);
        }
Exemplo n.º 4
0
        public static TreeNode SplitClassificationNode(ClassificationTreeNode nodeToReplace, GeneticAlgorithmManager ga_mgr)
        {
            var nodeNewDecision = new YesNoMissingTreeNode();

            nodeNewDecision.CreateRandom(ga_mgr);

            //TODO unhardcode this items
            var item1 = 0.0;
            var item2 = 0.0;
            var item3 = 0.0;

            //create the two classification nodes
            var nodeTrue = new ClassificationTreeNode();

            nodeTrue.Classification = item1;
            nodeTrue._parent        = nodeNewDecision;

            var nodeFalse = new ClassificationTreeNode();

            nodeFalse.Classification = item2;
            nodeFalse._parent        = nodeNewDecision;

            var nodeMissing = new ClassificationTreeNode();

            nodeMissing.Classification = item3;
            nodeMissing._parent        = nodeNewDecision;

            nodeNewDecision._trueNode    = nodeTrue;
            nodeNewDecision._falseNode   = nodeFalse;
            nodeNewDecision._missingNode = nodeMissing;

            ReplaceOneNodeWithAnother(nodeToReplace, nodeNewDecision);
            //return the new tree with that change

            //try to optimize the node
            bool opTest = OptimizeTest(nodeNewDecision, ga_mgr);

            return(nodeNewDecision);
        }
Exemplo n.º 5
0
        public static IEnumerable <Tree> OptimizeSplitForNode(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //pick a random tree
            Random rando = ga_mgr.rando;

            Tree tree1      = treesInPopulation[rando.Next(treesInPopulation.Count())];
            Tree tree1_copy = tree1.Copy();

            var nodes_to_choose_from = tree1_copy.GetNodesOfType <YesNoMissingTreeNode>().Where(c => c.Test is LessThanEqualTreeTest);

            if (!nodes_to_choose_from.Any())
            {
                //empty collection test
                yield break;
            }

            var node_picker = new WeightedSelector <YesNoMissingTreeNode>(
                nodes_to_choose_from.Select(c => Tuple.Create(c, 1.0))
                );

            YesNoMissingTreeNode node1_copy = node_picker.PickRandom(rando);

            if (node1_copy == null)
            {
                yield break;
            }

            //iterate through all values for the node and use the one with the best impurity

            if (OptimizeTest(node1_copy, ga_mgr))
            {
                tree1_copy._source = "optimize value";

                yield return(tree1_copy);
            }

            yield break;
        }
Exemplo n.º 6
0
        public static IEnumerable <Tree> SplitNodeAndOptimizeTests(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //find a grab a tree
            Tree tree1 = treesInPopulation[ga_mgr.rando.Next(treesInPopulation.Count())];

            //uses the traversal count for selecting
            var node_picker = new WeightedSelector <ClassificationTreeNode>(
                tree1.GetNodesOfType <ClassificationTreeNode>().Select(c => Tuple.Create(c, (double)c._traverseCount))
                );

            ClassificationTreeNode node1 = node_picker.PickRandom(ga_mgr.rando);

            tree1.SetStructuralLocationsForNodes();
            if (node1.matrix == null)
            {
            }
            var matrix_rows = node1.matrix.GetRowsOrderedByCount().ToList();

            //trap is here in case there are fewer than 2 "top" rows to split on
            if (matrix_rows.Count >= 2)
            {
                //TODO improve this structural business to be cleaner and more obvious if it belongs to the Node or Tree
                Tree     tree1_copy = tree1.Copy();
                TreeNode node1_copy = tree1_copy.GetNodeAtStructualLocation(node1._structuralLocation);

                //grab the node with the greatest traverse count (that is terminal)
                //determine the two most popular classes from there (rows in the confusion table)
                //create a random test for the current node (change from classification to decision)
                //TODO create a proper factory for this code
                var node1_decision = new YesNoMissingTreeNode();

                node1_decision.CreateRandom(ga_mgr);
                node1_decision._parent = node1_copy._parent;

                var item1 = node1.Classification;
                var item2 = 0;
                var item3 = 0;

                //create the two classification nodes
                var node1a_class = new ClassificationTreeNode();
                node1a_class.Classification = item1;
                node1a_class._parent        = node1_decision;

                var node1b_class = new ClassificationTreeNode();
                node1b_class.Classification = item2;
                node1b_class._parent        = node1_decision;

                var node1c_class = new ClassificationTreeNode();
                node1c_class.Classification = item3;
                node1c_class._parent        = node1_decision;

                node1_decision._trueNode    = node1a_class;
                node1_decision._falseNode   = node1b_class;
                node1_decision._missingNode = node1c_class;

                //add the two nodes with the most popular classes
                //TODO create a "replace node" operation to standardize this code
                tree1_copy.RemoveNodeWithChildren(node1_copy);

                node1_copy.UpdateParentReference(node1_decision);
                tree1_copy.AddNodeWithChildren(node1_decision);
                //return the new tree with that change

                //try to optimize the node
                bool opTest = OptimizeTest(node1_decision, ga_mgr);

                tree1_copy._source = "node split";

                if (opTest)
                {
                    tree1_copy._source += " w/ op";
                }

                yield return(tree1_copy);
            }
        }
Exemplo n.º 7
0
        public static bool OptimizeTest(YesNoMissingTreeNode node1_copy, GeneticAlgorithmManager ga_mgr)
        {
            if (node1_copy.Test is LessThanEqualTreeTest)
            {
                LessThanEqualTreeTest test = node1_copy.Test as LessThanEqualTreeTest;

                if (test == null)
                {
                    return(false);
                }
                //iterate through all values, make split, test impurity
                var values      = ga_mgr.dataPointMgr._pointsToTest.Select(c => c._data[test.param]);
                var all_uniques = values.Where(c => !c._isMissing).Select(c => c._value).Distinct().OrderBy(c => c).ToArray();

                List <double> all_splits = new List <double>();
                for (int i = 1; i < all_uniques.Length; i++)
                {
                    all_splits.Add(0.5 * (all_uniques[i] + all_uniques[i - 1]));
                }

                double best_split  = double.NaN;
                double best_purity = double.MinValue;

                //TODO improve this selection for how many split points to consider
                foreach (var split in all_splits.TakeEvery(all_splits.Count / 10 + 1))
                {
                    //change the test value and find the best purity
                    test.valTest = split;

                    var results = new GeneticAlgorithmRunResults(ga_mgr);
                    node1_copy._tree.ProcessDataThroughTree(ga_mgr.dataPointMgr, results, ga_mgr.dataPointMgr._pointsToTest);

                    //check the result of the split
                    var gini_d = node1_copy.matrix.GiniImpuritySqrt;

                    double gini_split = 0.0;
                    int    count      = 0;

                    foreach (var node in node1_copy._subNodes)
                    {
                        gini_split += node.matrix._count * node.matrix.GiniImpuritySqrt;
                        count      += node.matrix._count;
                    }

                    gini_split /= count;

                    double gini_gain = gini_d - gini_split;

                    if (gini_gain > best_purity)
                    {
                        best_split  = split;
                        best_purity = gini_gain;
                    }
                }

                test.valTest = best_split;
            }
            else if (node1_copy.Test is EqualTreeTest)
            {
                EqualTreeTest test = node1_copy.Test as EqualTreeTest;

                if (test == null)
                {
                    return(false);
                }
                //iterate through all values, make split, test impurity
                var values = ga_mgr.dataPointMgr._pointsToTest.Select(c => c._data[test._param]);
                IEnumerable <double> all_uniques = values.Where(c => !c._isMissing).Select(c => c._value).Distinct().OrderBy(c => c);

                var unique_count = all_uniques.Count();
                if (unique_count > 10)
                {
                    all_uniques = all_uniques.TakeEvery(unique_count / 10 + 1);
                }

                double best_split  = double.NaN;
                double best_purity = double.MinValue;

                //TODO improve this selection for how many split points to consider
                foreach (var split in all_uniques)
                {
                    //change the test value and find the best purity
                    test._valTest = split;

                    var results = new GeneticAlgorithmRunResults(ga_mgr);

                    node1_copy._tree.ProcessDataThroughTree(ga_mgr.dataPointMgr, results, ga_mgr.dataPointMgr._pointsToTest);

                    var gini_d = node1_copy.matrix.GiniImpuritySqrt;

                    double gini_split = 0.0;
                    int    count      = 0;

                    foreach (var node in node1_copy._subNodes)
                    {
                        gini_split += node.matrix._count * node.matrix.GiniImpuritySqrt;
                        count      += node.matrix._count;
                    }

                    gini_split /= count;

                    double gini_gain = gini_d - gini_split;

                    if (gini_gain > best_purity)
                    {
                        best_split  = split;
                        best_purity = gini_gain;
                    }
                }

                test._valTest = best_split;
            }
            else
            {
                return(false);
            }

            return(true);
        }