示例#1
0
        public static GeneratedDataColumn CreateNewRandom(GeneticAlgorithmManager ga_mgr)
        {
            //do some work to create the new column
            var column = new GeneratedDataColumn();

            column._scaling = ga_mgr.rando.NextDouble() * 20 - 10.0;
            //this allows probs to change after each generation if desired
            var operations = new List <Tuple <GeneratedDataColumn.FormulaOptions, double> >();

            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.INV, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.LN, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.SQR, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.SQRT, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.TANH, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.NONE, 10.0));
            var operation_picker = WeightedSelector.Create(operations);

            column._formula = operation_picker.PickRandom(ga_mgr.rando);
            //forces to be a double data column here
            DataColumn baseColumn = null;

            do
            {
                int index = ga_mgr.rando.Next(ga_mgr.dataPointMgr._columns.Count);
                baseColumn = ga_mgr.dataPointMgr._columns[index];
            }while (baseColumn._type != DataValueTypes.NUMBER);

            column._baseColumn = baseColumn as DoubleDataColumn;
            column._type       = DataValueTypes.NUMBER;

            column.CreateValues(ga_mgr.dataPointMgr);

            return(column);
        }
示例#2
0
        public static IEnumerable <Tree> CreateRandomTree(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            Tree tree = CreateRandomTree(ga_mgr);

            tree._source = "new tree";
            yield return(tree);
        }
示例#3
0
        public static TreeNode TreeNodeFactory(GeneticAlgorithmManager ga_mgr, bool ShouldForceTerminal, Tree tree)
        {
            TreeNode node_output;

            bool term_node = ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.prob_node_terminal;

            //TODO: consider changing this or using some other scheme to prevent runaway initial trees.
            if (term_node || ShouldForceTerminal || tree._nodes.Count > ga_mgr._gaOptions.max_node_count_for_new_tree)
            {
                var node = new ClassificationTreeNode();
                node.CreateRandom(ga_mgr);

                node_output = node;
            }
            else
            {
                var node = new YesNoMissingTreeNode();
                node.CreateRandom(ga_mgr);

                node_output = node;
            }

            //TODO there might be a better place for this
            node_output.matrix = new ConfusionMatrix(ga_mgr.dataPointMgr.classes.Length);

            tree.AddNodeWithoutChildren(node_output);
            return(node_output);
        }
示例#4
0
        public static IEnumerable <Tree> DeleteNodeFromTree(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //node deletion
            Random rando      = ga_mgr.rando;
            Tree   tree1      = treesInPopulation[rando.Next(treesInPopulation.Count())];
            Tree   tree1_copy = tree1.Copy();

            TreeNode node1 = tree1_copy._nodes[rando.Next(tree1_copy._nodes.Count)];

            var node1_rando_term = TreeNode.TreeNodeFactory(ga_mgr, true, tree1_copy) as ClassificationTreeNode;

            //stick the new node into the old one's spot

            node1_rando_term.Classification = -1;
            node1_rando_term._parent        = node1._parent;

            tree1_copy.RemoveNodeWithChildren(node1);

            if (node1_rando_term._parent != null && tree1_copy._nodes.Count > 0)
            {
                node1_rando_term._parent.UpdateChildReference(node1, node1_rando_term);
                tree1_copy._source = "delete";
                yield return(tree1_copy);
            }
        }
示例#5
0
        public override void FillNodeWithRandomChildrenIfNeeded(GeneticAlgorithmManager ga_mgr)
        {
            this._trueNode         = TreeNode.TreeNodeFactory(ga_mgr, false, this._tree);
            this._trueNode._parent = this;

            this._falseNode         = TreeNode.TreeNodeFactory(ga_mgr, false, this._tree);
            this._falseNode._parent = this;
        }
        public override bool ChangeTestValue(GeneticAlgorithmManager mgr)
        {
            //try a simple percent test first
            double change = (mgr.rando.NextDouble() * 2 - 1.0) * mgr._gaOptions.test_value_change;

            this.valTest += this.valTest * change;
            return(true);
        }
示例#7
0
        public static IEnumerable <Tree> SwapNodesBetweenTrees(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            Random rando = ga_mgr.rando;
            //node swap
            Tree tree1 = treesInPopulation[rando.Next(treesInPopulation.Count())];
            Tree tree2 = treesInPopulation[rando.Next(treesInPopulation.Count())];

            if (tree1._root.IsTerminal || tree2._root.IsTerminal)
            {
                //terminal node for a root means no useful swapping to be done
                yield break;
            }

            tree1.SetStructuralLocationsForNodes();
            tree2.SetStructuralLocationsForNodes();

            Tree tree1_copy = tree1.Copy();
            Tree tree2_copy = tree2.Copy();

            tree1_copy._source = "swap";
            tree2_copy._source = "swap";

            //tries to pick good nodes to swap around
            //TODO look into a better way to pick nodes
            //TODO make it easier to select a node with equal weight
            var tree1_node_picker = new WeightedSelector <YesNoMissingTreeNode>(
                tree1.GetNodesOfType <YesNoMissingTreeNode>().Select(c => Tuple.Create(c, 1.0)));

            var tree2_node_picker = new WeightedSelector <YesNoMissingTreeNode>(
                tree2.GetNodesOfType <YesNoMissingTreeNode>().Select(c => Tuple.Create(c, 1.0)));

            var node1_picked = tree1_node_picker.PickRandom(ga_mgr.rando);
            var node2_picked = tree2_node_picker.PickRandom(ga_mgr.rando);

            if (node1_picked == null || node2_picked == null)
            {
                //trap exists for those trees where there are no classification nodes
                yield break;
            }

            TreeNode node1 = tree1_copy.GetNodeAtStructualLocation(node1_picked._structuralLocation);
            TreeNode node2 = tree2_copy.GetNodeAtStructualLocation(node2_picked._structuralLocation);

            TreeNode.SwapNodesInTrees(node1, node2);

            //stick both trees into the next gen
            if (tree1_copy._nodes.Count > 0)
            {
                yield return(tree1_copy);
            }
            if (tree2_copy._nodes.Count > 0)
            {
                yield return(tree2_copy);
            }
        }
示例#8
0
        public override bool ChangeTestValue(GeneticAlgorithmManager mgr)
        {
            //TODO this should only add/remove a single (or up to X) categories
            this._values.Clear();
            var col                = mgr.dataPointMgr._columns[this._param] as CategoryDataColumn;
            int category_count     = col._codebook.GetCategories().Count();
            int categories_to_keep = 2 + mgr.rando.Next(category_count - 2);

            this._values.AddRange(col._codebook.GetCategories().OrderBy(c => mgr.rando.NextDouble()).Take(categories_to_keep));
            return(true);
        }
示例#9
0
        public void ResetTrackingDetails(GeneticAlgorithmManager ga_mgr, bool shouldApplyRecursively)
        {
            this.matrix         = new ConfusionMatrix(ga_mgr.dataPointMgr.classes.Length);
            this._traverseCount = 0;

            if (shouldApplyRecursively)
            {
                foreach (var node in _subNodes)
                {
                    node.ResetTrackingDetails(ga_mgr, shouldApplyRecursively);
                }
            }
        }
示例#10
0
        public static IEnumerable <Tree> ChangeValueForNode(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //node parameter/value change
            Random rando      = ga_mgr.rando;
            Tree   tree1      = treesInPopulation[rando.Next(treesInPopulation.Count())];
            Tree   tree1_copy = tree1.Copy();

            TreeNode node1_copy = tree1_copy._nodes[rando.Next(tree1_copy._nodes.Count)];

            node1_copy.ApplyRandomChangeToNodeValue(ga_mgr);

            yield return(tree1_copy);
        }
示例#11
0
        public static IEnumerable <Tree> OptimizeClassesForTree(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            Random rando = ga_mgr.rando;
            Tree   tree1 = treesInPopulation[rando.Next(treesInPopulation.Count())];

            tree1.SetStructuralLocationsForNodes();

            Tree tree1_copy = tree1.Copy();

            //chose a node that is a classification node (choose on tree 1 to check classes)

            var nodes_to_choose_from = tree1.GetNodesOfType <ClassificationTreeNode>();

            if (!nodes_to_choose_from.Any())
            {
                //empty collection test
                yield break;
            }

            //iterate through all nodes, find best class, store struct location for that node along with best class
            //iterate through that list, replace classes for foudn nodes


            bool changeMade = false;

            foreach (var node in nodes_to_choose_from)
            {
                double bestRow = node.matrix.GetRowWithMaxCount();
                if (node.Classification != bestRow)
                {
                    var node1_copy = tree1_copy.GetNodeAtStructualLocation(node._structuralLocation) as ClassificationTreeNode;

                    if (node1_copy != null)
                    {
                        node1_copy.Classification = bestRow;
                        changeMade = true;
                    }
                }
            }

            if (changeMade)
            {
                tree1_copy._source = "optimize tree classes";
                yield return(tree1_copy);
            }
            else
            {
                yield break;
            }
        }
示例#12
0
 public override void ApplyRandomChangeToNodeValue(GeneticAlgorithmManager ga_mgr)
 {
     //TODO add somet logic here to handle the different test type... maybe pass this into the test next
     if (ga_mgr.rando.NextDouble() < 0.8)
     {
         //just change the value
         bool result = this.Test.ChangeTestValue(ga_mgr);
         this._tree._source = "new test value";
     }
     else
     {
         this.Test          = TreeTest.TreeTestFactory(ga_mgr);
         this._tree._source = "new test";
     }
 }
示例#13
0
        public static Tree CreateRandomTree(GeneticAlgorithmManager ga_mgr)
        {
            //build a random tree
            Tree tree = new Tree();

            //this is a dummy node that will get split immediately
            ClassificationTreeNode root = new ClassificationTreeNode();

            tree.AddRootToTree(root);

            var newRoot = SplitClassificationNode(root, ga_mgr);

            //now have three nodes under the root

            //run a queue to create children for non-terminal nodes
            var terminalNodesToSplit = new Queue <ClassificationTreeNode>();

            foreach (var subNode in newRoot._subNodes)
            {
                terminalNodesToSplit.Enqueue(subNode as ClassificationTreeNode);
            }

            while (terminalNodesToSplit.Count > 0 && tree._nodes.Count < ga_mgr._gaOptions.Max_node_count_for_new_tree)
            {
                var node = terminalNodesToSplit.Dequeue();

                //this will progressively split the nodes if rando is less than target
                if (ga_mgr.rando.NextDouble() > ga_mgr._gaOptions.Prob_node_terminal)
                {
                    var newNode = SplitClassificationNode(node, ga_mgr);

                    foreach (var subNode in newNode._subNodes)
                    {
                        terminalNodesToSplit.Enqueue(subNode as ClassificationTreeNode);
                    }
                }
            }

            return(tree);
        }
示例#14
0
        public static IEnumerable <Tree> OptimizeSplitForNode(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //pick a random tree
            Random rando = ga_mgr.rando;

            Tree tree1      = treesInPopulation[rando.Next(treesInPopulation.Count())];
            Tree tree1_copy = tree1.Copy();

            var nodes_to_choose_from = tree1_copy.GetNodesOfType <YesNoMissingTreeNode>().Where(c => c.Test is LessThanEqualTreeTest);

            if (!nodes_to_choose_from.Any())
            {
                //empty collection test
                yield break;
            }

            var node_picker = new WeightedSelector <YesNoMissingTreeNode>(
                nodes_to_choose_from.Select(c => Tuple.Create(c, 1.0))
                );

            YesNoMissingTreeNode node1_copy = node_picker.PickRandom(rando);

            if (node1_copy == null)
            {
                yield break;
            }

            //iterate through all values for the node and use the one with the best impurity

            if (OptimizeTest(node1_copy, ga_mgr))
            {
                tree1_copy._source = "optimize value";

                yield return(tree1_copy);
            }

            yield break;
        }
示例#15
0
        public static IEnumerable <Tree> SplitNodeAndOptimizeTests(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //find a grab a tree
            Tree tree1 = treesInPopulation[ga_mgr.rando.Next(treesInPopulation.Count())];

            //uses the traversal count for selecting
            var node_picker = new WeightedSelector <ClassificationTreeNode>(
                tree1.GetNodesOfType <ClassificationTreeNode>().Select(c => Tuple.Create(c, (double)c._traverseCount))
                );

            ClassificationTreeNode node1 = node_picker.PickRandom(ga_mgr.rando);

            tree1.SetStructuralLocationsForNodes();
            if (node1.matrix == null)
            {
            }
            var matrix_rows = node1.matrix.GetRowsOrderedByCount().ToList();

            //trap is here in case there are fewer than 2 "top" rows to split on
            if (matrix_rows.Count >= 2)
            {
                //TODO improve this structural business to be cleaner and more obvious if it belongs to the Node or Tree
                Tree     tree1_copy = tree1.Copy();
                TreeNode node1_copy = tree1_copy.GetNodeAtStructualLocation(node1._structuralLocation);

                //grab the node with the greatest traverse count (that is terminal)
                //determine the two most popular classes from there (rows in the confusion table)
                //create a random test for the current node (change from classification to decision)
                //TODO create a proper factory for this code
                var node1_decision = new YesNoMissingTreeNode();

                node1_decision.CreateRandom(ga_mgr);
                node1_decision._parent = node1_copy._parent;

                var item1 = node1.Classification;
                var item2 = 0;
                var item3 = 0;

                //create the two classification nodes
                var node1a_class = new ClassificationTreeNode();
                node1a_class.Classification = item1;
                node1a_class._parent        = node1_decision;

                var node1b_class = new ClassificationTreeNode();
                node1b_class.Classification = item2;
                node1b_class._parent        = node1_decision;

                var node1c_class = new ClassificationTreeNode();
                node1c_class.Classification = item3;
                node1c_class._parent        = node1_decision;

                node1_decision._trueNode    = node1a_class;
                node1_decision._falseNode   = node1b_class;
                node1_decision._missingNode = node1c_class;

                //add the two nodes with the most popular classes
                //TODO create a "replace node" operation to standardize this code
                tree1_copy.RemoveNodeWithChildren(node1_copy);

                node1_copy.UpdateParentReference(node1_decision);
                tree1_copy.AddNodeWithChildren(node1_decision);
                //return the new tree with that change

                //try to optimize the node
                bool opTest = OptimizeTest(node1_decision, ga_mgr);

                tree1_copy._source = "node split";

                if (opTest)
                {
                    tree1_copy._source += " w/ op";
                }

                yield return(tree1_copy);
            }
        }
示例#16
0
        public static bool OptimizeTest(YesNoMissingTreeNode node1_copy, GeneticAlgorithmManager ga_mgr)
        {
            if (node1_copy.Test is LessThanEqualTreeTest)
            {
                LessThanEqualTreeTest test = node1_copy.Test as LessThanEqualTreeTest;

                if (test == null)
                {
                    return(false);
                }
                //iterate through all values, make split, test impurity
                var values      = ga_mgr.dataPointMgr._pointsToTest.Select(c => c._data[test.param]);
                var all_uniques = values.Where(c => !c._isMissing).Select(c => c._value).Distinct().OrderBy(c => c).ToArray();

                List <double> all_splits = new List <double>();
                for (int i = 1; i < all_uniques.Length; i++)
                {
                    all_splits.Add(0.5 * (all_uniques[i] + all_uniques[i - 1]));
                }

                double best_split  = double.NaN;
                double best_purity = double.MinValue;

                //TODO improve this selection for how many split points to consider
                foreach (var split in all_splits.TakeEvery(all_splits.Count / 10 + 1))
                {
                    //change the test value and find the best purity
                    test.valTest = split;

                    var results = new GeneticAlgorithmRunResults(ga_mgr);
                    node1_copy._tree.ProcessDataThroughTree(ga_mgr.dataPointMgr, results, ga_mgr.dataPointMgr._pointsToTest);

                    //check the result of the split
                    var gini_d = node1_copy.matrix.GiniImpuritySqrt;

                    double gini_split = 0.0;
                    int    count      = 0;

                    foreach (var node in node1_copy._subNodes)
                    {
                        gini_split += node.matrix._count * node.matrix.GiniImpuritySqrt;
                        count      += node.matrix._count;
                    }

                    gini_split /= count;

                    double gini_gain = gini_d - gini_split;

                    if (gini_gain > best_purity)
                    {
                        best_split  = split;
                        best_purity = gini_gain;
                    }
                }

                test.valTest = best_split;
            }
            else if (node1_copy.Test is EqualTreeTest)
            {
                EqualTreeTest test = node1_copy.Test as EqualTreeTest;

                if (test == null)
                {
                    return(false);
                }
                //iterate through all values, make split, test impurity
                var values = ga_mgr.dataPointMgr._pointsToTest.Select(c => c._data[test._param]);
                IEnumerable <double> all_uniques = values.Where(c => !c._isMissing).Select(c => c._value).Distinct().OrderBy(c => c);

                var unique_count = all_uniques.Count();
                if (unique_count > 10)
                {
                    all_uniques = all_uniques.TakeEvery(unique_count / 10 + 1);
                }

                double best_split  = double.NaN;
                double best_purity = double.MinValue;

                //TODO improve this selection for how many split points to consider
                foreach (var split in all_uniques)
                {
                    //change the test value and find the best purity
                    test._valTest = split;

                    var results = new GeneticAlgorithmRunResults(ga_mgr);

                    node1_copy._tree.ProcessDataThroughTree(ga_mgr.dataPointMgr, results, ga_mgr.dataPointMgr._pointsToTest);

                    var gini_d = node1_copy.matrix.GiniImpuritySqrt;

                    double gini_split = 0.0;
                    int    count      = 0;

                    foreach (var node in node1_copy._subNodes)
                    {
                        gini_split += node.matrix._count * node.matrix.GiniImpuritySqrt;
                        count      += node.matrix._count;
                    }

                    gini_split /= count;

                    double gini_gain = gini_d - gini_split;

                    if (gini_gain > best_purity)
                    {
                        best_split  = split;
                        best_purity = gini_gain;
                    }
                }

                test._valTest = best_split;
            }
            else
            {
                return(false);
            }

            return(true);
        }
示例#17
0
 public virtual void FillNodeWithRandomChildrenIfNeeded(GeneticAlgorithmManager ga_mgr)
 {
     return;
 }
示例#18
0
 public abstract void ApplyRandomChangeToNodeValue(GeneticAlgorithmManager ga_mgr);
示例#19
0
 public abstract void CreateRandom(GeneticAlgorithmManager ga_mgr);
 public override void CreateRandom(GeneticAlgorithmManager ga_mgr)
 {
     this.Classification = ga_mgr.dataPointMgr.GetRandomClassification(ga_mgr.rando);
 }
示例#21
0
        public static TreeNode SplitClassificationNode(ClassificationTreeNode nodeToReplace, GeneticAlgorithmManager ga_mgr)
        {
            var nodeNewDecision = new YesNoMissingTreeNode();

            nodeNewDecision.CreateRandom(ga_mgr);

            //TODO unhardcode this items
            var item1 = 0.0;
            var item2 = 0.0;
            var item3 = 0.0;

            //create the two classification nodes
            var nodeTrue = new ClassificationTreeNode();

            nodeTrue.Classification = item1;
            nodeTrue._parent        = nodeNewDecision;

            var nodeFalse = new ClassificationTreeNode();

            nodeFalse.Classification = item2;
            nodeFalse._parent        = nodeNewDecision;

            var nodeMissing = new ClassificationTreeNode();

            nodeMissing.Classification = item3;
            nodeMissing._parent        = nodeNewDecision;

            nodeNewDecision._trueNode    = nodeTrue;
            nodeNewDecision._falseNode   = nodeFalse;
            nodeNewDecision._missingNode = nodeMissing;

            ReplaceOneNodeWithAnother(nodeToReplace, nodeNewDecision);
            //return the new tree with that change

            //try to optimize the node
            bool opTest = OptimizeTest(nodeNewDecision, ga_mgr);

            return(nodeNewDecision);
        }
示例#22
0
 public virtual bool ChangeTestValue(GeneticAlgorithmManager mgr)
 {
     return(false);
 }
 public override void ApplyRandomChangeToNodeValue(GeneticAlgorithmManager ga_mgr)
 {
     //can get away with just doing the random thing here
     this.CreateRandom(ga_mgr);
     this._tree._source = "new class";
 }
示例#24
0
 public static TreeTest TreeTestFactory(GeneticAlgorithmManager ga_mgr)
 {
     return(TreeTestFactory(ga_mgr.dataPointMgr, ga_mgr.rando));
 }
示例#25
0
 public override void CreateRandom(GeneticAlgorithmManager ga_mgr)
 {
     this.Test = TreeTest.TreeTestFactory(ga_mgr);
 }
 public GeneticAlgorithmRunResults(GeneticAlgorithmManager ga_mgr)
 {
     this.ga_mgr = ga_mgr;
     _matrix     = new ConfusionMatrix(ga_mgr.dataPointMgr.classes.Length);
 }
示例#27
0
 public PredictionManager(GeneticAlgorithmManager geneticAlgorithmManager)
 {
     this.ga_mgr   = geneticAlgorithmManager;
     this.data_mgr = ga_mgr.dataPointMgr;
 }