Exemplo n.º 1
0
        public static GeneratedDataColumn CreateNewRandom(GeneticAlgorithmManager ga_mgr)
        {
            //do some work to create the new column
            var column = new GeneratedDataColumn();

            column._scaling = ga_mgr.rando.NextDouble() * 20 - 10.0;
            //this allows probs to change after each generation if desired
            var operations = new List <Tuple <GeneratedDataColumn.FormulaOptions, double> >();

            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.INV, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.LN, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.SQR, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.SQRT, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.TANH, 10.0));
            operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.NONE, 10.0));
            var operation_picker = WeightedSelector.Create(operations);

            column._formula = operation_picker.PickRandom(ga_mgr.rando);
            //forces to be a double data column here
            DataColumn baseColumn = null;

            do
            {
                int index = ga_mgr.rando.Next(ga_mgr.dataPointMgr._columns.Count);
                baseColumn = ga_mgr.dataPointMgr._columns[index];
            }while (baseColumn._type != DataValueTypes.NUMBER);

            column._baseColumn = baseColumn as DoubleDataColumn;
            column._type       = DataValueTypes.NUMBER;

            column.CreateValues(ga_mgr.dataPointMgr);

            return(column);
        }
Exemplo n.º 2
0
        public static IEnumerable <Tree> SwapNodesBetweenTrees(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            Random rando = ga_mgr.rando;
            //node swap
            Tree tree1 = treesInPopulation[rando.Next(treesInPopulation.Count())];
            Tree tree2 = treesInPopulation[rando.Next(treesInPopulation.Count())];

            if (tree1._root.IsTerminal || tree2._root.IsTerminal)
            {
                //terminal node for a root means no useful swapping to be done
                yield break;
            }

            tree1.SetStructuralLocationsForNodes();
            tree2.SetStructuralLocationsForNodes();

            Tree tree1_copy = tree1.Copy();
            Tree tree2_copy = tree2.Copy();

            tree1_copy._source = "swap";
            tree2_copy._source = "swap";

            //tries to pick good nodes to swap around
            //TODO look into a better way to pick nodes
            //TODO make it easier to select a node with equal weight
            var tree1_node_picker = new WeightedSelector <YesNoMissingTreeNode>(
                tree1.GetNodesOfType <YesNoMissingTreeNode>().Select(c => Tuple.Create(c, 1.0)));

            var tree2_node_picker = new WeightedSelector <YesNoMissingTreeNode>(
                tree2.GetNodesOfType <YesNoMissingTreeNode>().Select(c => Tuple.Create(c, 1.0)));

            var node1_picked = tree1_node_picker.PickRandom(ga_mgr.rando);
            var node2_picked = tree2_node_picker.PickRandom(ga_mgr.rando);

            if (node1_picked == null || node2_picked == null)
            {
                //trap exists for those trees where there are no classification nodes
                yield break;
            }

            TreeNode node1 = tree1_copy.GetNodeAtStructualLocation(node1_picked._structuralLocation);
            TreeNode node2 = tree2_copy.GetNodeAtStructualLocation(node2_picked._structuralLocation);

            TreeNode.SwapNodesInTrees(node1, node2);

            //stick both trees into the next gen
            if (tree1_copy._nodes.Count > 0)
            {
                yield return(tree1_copy);
            }
            if (tree2_copy._nodes.Count > 0)
            {
                yield return(tree2_copy);
            }
        }
Exemplo n.º 3
0
        public static IEnumerable <Tree> OptimizeSplitForNode(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //pick a random tree
            Random rando = ga_mgr.rando;

            Tree tree1      = treesInPopulation[rando.Next(treesInPopulation.Count())];
            Tree tree1_copy = tree1.Copy();

            var nodes_to_choose_from = tree1_copy.GetNodesOfType <YesNoMissingTreeNode>().Where(c => c.Test is LessThanEqualTreeTest);

            if (!nodes_to_choose_from.Any())
            {
                //empty collection test
                yield break;
            }

            var node_picker = new WeightedSelector <YesNoMissingTreeNode>(
                nodes_to_choose_from.Select(c => Tuple.Create(c, 1.0))
                );

            YesNoMissingTreeNode node1_copy = node_picker.PickRandom(rando);

            if (node1_copy == null)
            {
                yield break;
            }

            //iterate through all values for the node and use the one with the best impurity

            if (OptimizeTest(node1_copy, ga_mgr))
            {
                tree1_copy._source = "optimize value";

                yield return(tree1_copy);
            }

            yield break;
        }
Exemplo n.º 4
0
        public static IEnumerable <Tree> SplitNodeAndOptimizeTests(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation)
        {
            //find a grab a tree
            Tree tree1 = treesInPopulation[ga_mgr.rando.Next(treesInPopulation.Count())];

            //uses the traversal count for selecting
            var node_picker = new WeightedSelector <ClassificationTreeNode>(
                tree1.GetNodesOfType <ClassificationTreeNode>().Select(c => Tuple.Create(c, (double)c._traverseCount))
                );

            ClassificationTreeNode node1 = node_picker.PickRandom(ga_mgr.rando);

            tree1.SetStructuralLocationsForNodes();
            if (node1.matrix == null)
            {
            }
            var matrix_rows = node1.matrix.GetRowsOrderedByCount().ToList();

            //trap is here in case there are fewer than 2 "top" rows to split on
            if (matrix_rows.Count >= 2)
            {
                //TODO improve this structural business to be cleaner and more obvious if it belongs to the Node or Tree
                Tree     tree1_copy = tree1.Copy();
                TreeNode node1_copy = tree1_copy.GetNodeAtStructualLocation(node1._structuralLocation);

                //grab the node with the greatest traverse count (that is terminal)
                //determine the two most popular classes from there (rows in the confusion table)
                //create a random test for the current node (change from classification to decision)
                //TODO create a proper factory for this code
                var node1_decision = new YesNoMissingTreeNode();

                node1_decision.CreateRandom(ga_mgr);
                node1_decision._parent = node1_copy._parent;

                var item1 = node1.Classification;
                var item2 = 0;
                var item3 = 0;

                //create the two classification nodes
                var node1a_class = new ClassificationTreeNode();
                node1a_class.Classification = item1;
                node1a_class._parent        = node1_decision;

                var node1b_class = new ClassificationTreeNode();
                node1b_class.Classification = item2;
                node1b_class._parent        = node1_decision;

                var node1c_class = new ClassificationTreeNode();
                node1c_class.Classification = item3;
                node1c_class._parent        = node1_decision;

                node1_decision._trueNode    = node1a_class;
                node1_decision._falseNode   = node1b_class;
                node1_decision._missingNode = node1c_class;

                //add the two nodes with the most popular classes
                //TODO create a "replace node" operation to standardize this code
                tree1_copy.RemoveNodeWithChildren(node1_copy);

                node1_copy.UpdateParentReference(node1_decision);
                tree1_copy.AddNodeWithChildren(node1_decision);
                //return the new tree with that change

                //try to optimize the node
                bool opTest = OptimizeTest(node1_decision, ga_mgr);

                tree1_copy._source = "node split";

                if (opTest)
                {
                    tree1_copy._source += " w/ op";
                }

                yield return(tree1_copy);
            }
        }
Exemplo n.º 5
0
        public List <Tree> ProcessTheNextGeneration(List <Tree> treesInPopulation)
        {
            //do an initial scoring
            treesInPopulation = ScoreTreesAndReturnKept(treesInPopulation, 0);

            for (int generationNumber = 0; generationNumber < _gaOptions.generations; generationNumber++)
            {
                var newTreesThisGen = new List <Tree>();

                Logger.WriteLine("generation: " + generationNumber);

                //this allows probs to change after each generation if desired
                var operations = new List <Tuple <GeneticOperations.GeneticOperation, double> >();

                operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.SwapNodesBetweenTrees, _gaOptions.Prob_ops_swap));
                operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.SplitNodeAndOptimizeTests, _gaOptions.prob_node_split));
                operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.CreateRandomTree, _gaOptions.Prob_ops_new_tree));
                operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.OptimizeSplitForNode, 10.0));
                //operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.OptimizeClassesForTree, 10.0));
                operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.DeleteNodeFromTree, _gaOptions.prob_ops_delete));
                //operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.SuperSplit, 5.0));

                var operation_picker = new WeightedSelector <GeneticOperations.GeneticOperation>(operations);

                Parallel.For(0, _gaOptions.PopulationSize, index =>
                {
                    var operation = operation_picker.PickRandom(rando);
                    var newTrees  = operation(this, treesInPopulation).ToList();

                    //TODO determine if this is needed
                    lock (newTreesThisGen)
                    {
                        newTreesThisGen.AddRange(newTrees);
                    }
                });

                treesInPopulation.AddRange(newTreesThisGen);

                treesInPopulation = ScoreTreesAndReturnKept(treesInPopulation, generationNumber);

                foreach (var element in treesInPopulation.GroupBy(c => c._source))
                {
                    Logger.WriteLine(string.Format("{0} has {1} = {2:0.000}",
                                                   element.Key, element.Count(),
                                                   1.0 * element.Count() / treesInPopulation.Count));
                }

                //thin down the herd and take pop size or total
                treesInPopulation = treesInPopulation.Distinct()
                                    .OrderByDescending(c => c._prevResults.MetricResult)
                                    .Take((int)(_gaOptions.populationSize * _gaOptions.prob_population_to_keep))
                                    .ToList();

                //output some info on best
                //Logger.WriteLine(string.Join("\r\n", starter.Take(10).Select(c => c._prevResults.ToString())));
                Logger.WriteLine("");
                foreach (var tree in treesInPopulation.Take(10))
                {
                    Logger.WriteLine("{0:0.0000} ({2:0.0000}, {3}, {1})",
                                     tree._prevResults.MetricResult,
                                     tree._source, tree._prevResults.AverageLoss,
                                     tree._prevResults.tree_nodeCount);
                }

                foreach (var tree in treesInPopulation.Take(1))
                {
                    Logger.WriteLine("");
                    Logger.WriteLine(tree);
                    Logger.WriteLine(tree._prevResults);
                }

                OnProgressUpdated(100 * generationNumber / Math.Max(_gaOptions.generations, 1));
            }

            OnProgressUpdated(100);

            return(treesInPopulation);
        }