public static GeneratedDataColumn CreateNewRandom(GeneticAlgorithmManager ga_mgr) { //do some work to create the new column var column = new GeneratedDataColumn(); column._scaling = ga_mgr.rando.NextDouble() * 20 - 10.0; //this allows probs to change after each generation if desired var operations = new List <Tuple <GeneratedDataColumn.FormulaOptions, double> >(); operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.INV, 10.0)); operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.LN, 10.0)); operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.SQR, 10.0)); operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.SQRT, 10.0)); operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.TANH, 10.0)); operations.Add(Tuple.Create(GeneratedDataColumn.FormulaOptions.NONE, 10.0)); var operation_picker = WeightedSelector.Create(operations); column._formula = operation_picker.PickRandom(ga_mgr.rando); //forces to be a double data column here DataColumn baseColumn = null; do { int index = ga_mgr.rando.Next(ga_mgr.dataPointMgr._columns.Count); baseColumn = ga_mgr.dataPointMgr._columns[index]; }while (baseColumn._type != DataValueTypes.NUMBER); column._baseColumn = baseColumn as DoubleDataColumn; column._type = DataValueTypes.NUMBER; column.CreateValues(ga_mgr.dataPointMgr); return(column); }
public static IEnumerable <Tree> SwapNodesBetweenTrees(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation) { Random rando = ga_mgr.rando; //node swap Tree tree1 = treesInPopulation[rando.Next(treesInPopulation.Count())]; Tree tree2 = treesInPopulation[rando.Next(treesInPopulation.Count())]; if (tree1._root.IsTerminal || tree2._root.IsTerminal) { //terminal node for a root means no useful swapping to be done yield break; } tree1.SetStructuralLocationsForNodes(); tree2.SetStructuralLocationsForNodes(); Tree tree1_copy = tree1.Copy(); Tree tree2_copy = tree2.Copy(); tree1_copy._source = "swap"; tree2_copy._source = "swap"; //tries to pick good nodes to swap around //TODO look into a better way to pick nodes //TODO make it easier to select a node with equal weight var tree1_node_picker = new WeightedSelector <YesNoMissingTreeNode>( tree1.GetNodesOfType <YesNoMissingTreeNode>().Select(c => Tuple.Create(c, 1.0))); var tree2_node_picker = new WeightedSelector <YesNoMissingTreeNode>( tree2.GetNodesOfType <YesNoMissingTreeNode>().Select(c => Tuple.Create(c, 1.0))); var node1_picked = tree1_node_picker.PickRandom(ga_mgr.rando); var node2_picked = tree2_node_picker.PickRandom(ga_mgr.rando); if (node1_picked == null || node2_picked == null) { //trap exists for those trees where there are no classification nodes yield break; } TreeNode node1 = tree1_copy.GetNodeAtStructualLocation(node1_picked._structuralLocation); TreeNode node2 = tree2_copy.GetNodeAtStructualLocation(node2_picked._structuralLocation); TreeNode.SwapNodesInTrees(node1, node2); //stick both trees into the next gen if (tree1_copy._nodes.Count > 0) { yield return(tree1_copy); } if (tree2_copy._nodes.Count > 0) { yield return(tree2_copy); } }
public static IEnumerable <Tree> OptimizeSplitForNode(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation) { //pick a random tree Random rando = ga_mgr.rando; Tree tree1 = treesInPopulation[rando.Next(treesInPopulation.Count())]; Tree tree1_copy = tree1.Copy(); var nodes_to_choose_from = tree1_copy.GetNodesOfType <YesNoMissingTreeNode>().Where(c => c.Test is LessThanEqualTreeTest); if (!nodes_to_choose_from.Any()) { //empty collection test yield break; } var node_picker = new WeightedSelector <YesNoMissingTreeNode>( nodes_to_choose_from.Select(c => Tuple.Create(c, 1.0)) ); YesNoMissingTreeNode node1_copy = node_picker.PickRandom(rando); if (node1_copy == null) { yield break; } //iterate through all values for the node and use the one with the best impurity if (OptimizeTest(node1_copy, ga_mgr)) { tree1_copy._source = "optimize value"; yield return(tree1_copy); } yield break; }
public static IEnumerable <Tree> SplitNodeAndOptimizeTests(GeneticAlgorithmManager ga_mgr, List <Tree> treesInPopulation) { //find a grab a tree Tree tree1 = treesInPopulation[ga_mgr.rando.Next(treesInPopulation.Count())]; //uses the traversal count for selecting var node_picker = new WeightedSelector <ClassificationTreeNode>( tree1.GetNodesOfType <ClassificationTreeNode>().Select(c => Tuple.Create(c, (double)c._traverseCount)) ); ClassificationTreeNode node1 = node_picker.PickRandom(ga_mgr.rando); tree1.SetStructuralLocationsForNodes(); if (node1.matrix == null) { } var matrix_rows = node1.matrix.GetRowsOrderedByCount().ToList(); //trap is here in case there are fewer than 2 "top" rows to split on if (matrix_rows.Count >= 2) { //TODO improve this structural business to be cleaner and more obvious if it belongs to the Node or Tree Tree tree1_copy = tree1.Copy(); TreeNode node1_copy = tree1_copy.GetNodeAtStructualLocation(node1._structuralLocation); //grab the node with the greatest traverse count (that is terminal) //determine the two most popular classes from there (rows in the confusion table) //create a random test for the current node (change from classification to decision) //TODO create a proper factory for this code var node1_decision = new YesNoMissingTreeNode(); node1_decision.CreateRandom(ga_mgr); node1_decision._parent = node1_copy._parent; var item1 = node1.Classification; var item2 = 0; var item3 = 0; //create the two classification nodes var node1a_class = new ClassificationTreeNode(); node1a_class.Classification = item1; node1a_class._parent = node1_decision; var node1b_class = new ClassificationTreeNode(); node1b_class.Classification = item2; node1b_class._parent = node1_decision; var node1c_class = new ClassificationTreeNode(); node1c_class.Classification = item3; node1c_class._parent = node1_decision; node1_decision._trueNode = node1a_class; node1_decision._falseNode = node1b_class; node1_decision._missingNode = node1c_class; //add the two nodes with the most popular classes //TODO create a "replace node" operation to standardize this code tree1_copy.RemoveNodeWithChildren(node1_copy); node1_copy.UpdateParentReference(node1_decision); tree1_copy.AddNodeWithChildren(node1_decision); //return the new tree with that change //try to optimize the node bool opTest = OptimizeTest(node1_decision, ga_mgr); tree1_copy._source = "node split"; if (opTest) { tree1_copy._source += " w/ op"; } yield return(tree1_copy); } }
public List <Tree> ProcessTheNextGeneration(List <Tree> treesInPopulation) { //do an initial scoring treesInPopulation = ScoreTreesAndReturnKept(treesInPopulation, 0); for (int generationNumber = 0; generationNumber < _gaOptions.generations; generationNumber++) { var newTreesThisGen = new List <Tree>(); Logger.WriteLine("generation: " + generationNumber); //this allows probs to change after each generation if desired var operations = new List <Tuple <GeneticOperations.GeneticOperation, double> >(); operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.SwapNodesBetweenTrees, _gaOptions.Prob_ops_swap)); operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.SplitNodeAndOptimizeTests, _gaOptions.prob_node_split)); operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.CreateRandomTree, _gaOptions.Prob_ops_new_tree)); operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.OptimizeSplitForNode, 10.0)); //operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.OptimizeClassesForTree, 10.0)); operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.DeleteNodeFromTree, _gaOptions.prob_ops_delete)); //operations.Add(Tuple.Create((GeneticOperations.GeneticOperation)GeneticOperations.SuperSplit, 5.0)); var operation_picker = new WeightedSelector <GeneticOperations.GeneticOperation>(operations); Parallel.For(0, _gaOptions.PopulationSize, index => { var operation = operation_picker.PickRandom(rando); var newTrees = operation(this, treesInPopulation).ToList(); //TODO determine if this is needed lock (newTreesThisGen) { newTreesThisGen.AddRange(newTrees); } }); treesInPopulation.AddRange(newTreesThisGen); treesInPopulation = ScoreTreesAndReturnKept(treesInPopulation, generationNumber); foreach (var element in treesInPopulation.GroupBy(c => c._source)) { Logger.WriteLine(string.Format("{0} has {1} = {2:0.000}", element.Key, element.Count(), 1.0 * element.Count() / treesInPopulation.Count)); } //thin down the herd and take pop size or total treesInPopulation = treesInPopulation.Distinct() .OrderByDescending(c => c._prevResults.MetricResult) .Take((int)(_gaOptions.populationSize * _gaOptions.prob_population_to_keep)) .ToList(); //output some info on best //Logger.WriteLine(string.Join("\r\n", starter.Take(10).Select(c => c._prevResults.ToString()))); Logger.WriteLine(""); foreach (var tree in treesInPopulation.Take(10)) { Logger.WriteLine("{0:0.0000} ({2:0.0000}, {3}, {1})", tree._prevResults.MetricResult, tree._source, tree._prevResults.AverageLoss, tree._prevResults.tree_nodeCount); } foreach (var tree in treesInPopulation.Take(1)) { Logger.WriteLine(""); Logger.WriteLine(tree); Logger.WriteLine(tree._prevResults); } OnProgressUpdated(100 * generationNumber / Math.Max(_gaOptions.generations, 1)); } OnProgressUpdated(100); return(treesInPopulation); }