public PolicyBased_RHE_Agent( PD_AI_Macro_Agent_Base generator_agent, PD_AI_Macro_Agent_Base mutator_agent, List <PD_GameStateEvaluator> game_state_evaluators, P_RHE_Replacement_Rule replacement_rule, int genome_length, int num_generations, double initial_MutationRate, double final_MutationRate, int num_evaluation_repetitions ) { Generator_Agent = generator_agent; Mutator_Agent = mutator_agent; Game_State_Evaluators = game_state_evaluators; Replacement_Rule = replacement_rule; Genome_Length = genome_length; Num_Generations = num_generations; Initial_MutationRate = initial_MutationRate; Final_MutationRate = final_MutationRate; Mutation_Rate = initial_MutationRate; Num_Evaluation_Repetitions = num_evaluation_repetitions; Num_Successful_Mutations_ThisTime = 0; Num_Successful_Mutations_Different_FirstAction_ThisTime = 0; }
public PolicyBased_RHE_Agent( PD_AI_Macro_Agent_Base generator_agent, PD_AI_Macro_Agent_Base mutator_agent, List <PD_GameStateEvaluator> gameStateEvaluators, P_RHE_Replacement_Rule replacement_rule, int genome_length, int num_generations, double mutation_rate, int num_evaluation_repetitions ) : this( generator_agent, mutator_agent, gameStateEvaluators, replacement_rule, genome_length, num_generations, mutation_rate, mutation_rate, num_evaluation_repetitions ) { }
public void Expand( Random randomness_provider, PD_Game gameState, PD_AI_PathFinder pathFinder, PD_AI_Macro_Agent_Base defaultPolicyAgent ) { PD_Game generator_GameState = gameState.Request_Randomized_Copy(randomness_provider); int initialTurn = generator_GameState.game_state_counter.turn_index; int finalTurn = initialTurn + MaxGenomeLength - Genome.Count; int currentTurn = initialTurn; // prepare the dictionary of macros! Dictionary <int, List <PD_MacroAction> > macroActionsPerTurn = new Dictionary <int, List <PD_MacroAction> >(); for (int i = initialTurn; i < finalTurn; i++) { macroActionsPerTurn.Add( i, new List <PD_MacroAction>() ); } // create the new macros and put them in the dictionary while ( generator_GameState.GQ_Is_Ongoing() && currentTurn < finalTurn ) { var nextMacro = defaultPolicyAgent.GetNextMacroAction( randomness_provider, generator_GameState, pathFinder ); macroActionsPerTurn[currentTurn].Add(nextMacro); generator_GameState.Apply_Macro_Action( randomness_provider, nextMacro ); currentTurn = generator_GameState.game_state_counter.turn_index; } // expand the genome for (int i = initialTurn; i < finalTurn; i++) { if (macroActionsPerTurn[i].Count > 0) { RH_Gene gene = new RH_Gene( i, macroActionsPerTurn[i] ); Genome.Add(gene); } } }
public void EvaluateSelf( Random randomness_provider, PD_Game initial_GameState, PD_AI_PathFinder pathFinder, PD_AI_Macro_Agent_Base defaultPolicyAgent, List <PD_GameStateEvaluator> gameStateEvaluators ) { EvaluationScores = new List <double>(); List <double> evaluationScoreSums = new List <double>(); foreach (var evaluator in gameStateEvaluators) { EvaluationScores.Add(0.0); evaluationScoreSums.Add(0.0f); } for (int i = 0; i < NumSimulationsForEvaluation; i++) { PD_Game evaluation_GameState = initial_GameState.Request_Randomized_Copy(randomness_provider); foreach (var gene in Genome) { if (evaluation_GameState.GQ_Is_Ongoing()) { gene.ApplySelfOnGameState( randomness_provider, evaluation_GameState, pathFinder, defaultPolicyAgent ); } } if (evaluation_GameState.GQ_Is_Ongoing() && IsComplete() == false) { Expand( randomness_provider, evaluation_GameState, pathFinder, defaultPolicyAgent ); } for (int j = 0; j < gameStateEvaluators.Count; j++) { evaluationScoreSums[j] += gameStateEvaluators[j].EvaluateGameState(evaluation_GameState); } } for (int j = 0; j < gameStateEvaluators.Count; j++) { EvaluationScores[j] = evaluationScoreSums[j] / (double)NumSimulationsForEvaluation; } }
public void MutateSelf( Random randomness_provider, PD_Game initial_gameState, PD_AI_PathFinder pathFinder, PD_AI_Macro_Agent_Base defaultPolicyAgent, PD_AI_Macro_Agent_Base mutatorAgent ) { PD_Game mutation_GameState = initial_gameState.Request_Randomized_Copy(randomness_provider); int numberOfMacros = MacroActions.Count; int mutationIndex = randomness_provider.Next(numberOfMacros); List <PD_MacroAction> mutated_MacroActions = new List <PD_MacroAction>(); // first apply the non - mutated actions int counter = 0; while (Is_GameState_CorrectTurn_And_Ongoing(mutation_GameState)) { if (counter < mutationIndex) { // counter < mutation index => simply copy the existing macros into the mutated macros list. PD_MacroAction currentMacro = MacroActions[counter]; RH_State_Type stateType = Calculate_State_Type(mutation_GameState); RH_Macro_Type macroType = Calculate_Macro_Type(currentMacro); RH_Macro_Apply_Type response = Calculate_Macro_Application_Type( stateType, macroType ); switch (response) { case RH_Macro_Apply_Type.Normal: // store the macro and then apply it mutated_MacroActions.Add( MacroActions[counter] ); ApplyMacroOnGameState( randomness_provider, mutation_GameState, MacroActions[counter] ); counter++; break; case RH_Macro_Apply_Type.Insert: PD_MacroAction actionToInsert = defaultPolicyAgent.GetNextMacroAction( randomness_provider, mutation_GameState, pathFinder ); // first correct the macro actions, themselves... MacroActions.Insert( counter, actionToInsert ); mutated_MacroActions.Add( actionToInsert ); ApplyMacroOnGameState( randomness_provider, mutation_GameState, actionToInsert ); counter++; mutationIndex++; break; case RH_Macro_Apply_Type.Skip: // keep the skipped action, but do not apply it! mutated_MacroActions.Add( MacroActions[counter] ); counter++; break; case RH_Macro_Apply_Type.Error: throw new System.Exception("error here!"); } } else if (counter == mutationIndex) { // counter == mutation index => ask the mutator to provide a new action var replacement_MacroAction = mutatorAgent.GetNextMacroAction( randomness_provider, mutation_GameState, pathFinder ); mutated_MacroActions.Add(replacement_MacroAction); ApplyMacroOnGameState( randomness_provider, mutation_GameState, replacement_MacroAction ); counter++; } else { // counter > mutation index => ask the default policy agent to provide a new action var nextMacro = defaultPolicyAgent.GetNextMacroAction( randomness_provider, mutation_GameState, pathFinder ); if (nextMacro == null) { throw new System.Exception("problem here"); } ApplyMacroOnGameState( randomness_provider, mutation_GameState, nextMacro ); mutated_MacroActions.Add(nextMacro); counter++; } } MacroActions = mutated_MacroActions; }
public void ApplySelfOnGameState( Random randomness_provider, PD_Game gameStateToApplySelfOn, PD_AI_PathFinder pathFinder, PD_AI_Macro_Agent_Base defaultPolicyAgent ) { int currentMacroIndex = 0; while (Is_GameState_CorrectTurn_And_Ongoing(gameStateToApplySelfOn)) { PD_MacroAction currentMacro; if (currentMacroIndex < MacroActions.Count) { currentMacro = MacroActions[currentMacroIndex]; } else { currentMacro = null; } RH_State_Type state_Type = Calculate_State_Type(gameStateToApplySelfOn); RH_Macro_Type macro_Type = Calculate_Macro_Type(currentMacro); int currentGameState_TurnIndex = gameStateToApplySelfOn.game_state_counter.turn_index; RH_Macro_Apply_Type macroApplicationType = Calculate_Macro_Application_Type( state_Type, macro_Type ); // ACT ACCORDINGLY! switch (macroApplicationType) { case RH_Macro_Apply_Type.Normal: ApplyMacroOnGameState( randomness_provider, gameStateToApplySelfOn, currentMacro ); currentMacroIndex++; break; case RH_Macro_Apply_Type.Skip: currentMacroIndex++; break; case RH_Macro_Apply_Type.Insert: var missingMacro = defaultPolicyAgent.GetNextMacroAction( randomness_provider, gameStateToApplySelfOn, pathFinder ); RH_Macro_Type missingMacroType = Calculate_Macro_Type(missingMacro); MacroActions.Insert( currentMacroIndex, missingMacro ); ApplyMacroOnGameState( randomness_provider, gameStateToApplySelfOn, missingMacro ); currentMacroIndex++; break; case RH_Macro_Apply_Type.Error: throw new System.Exception("something wrong here!"); default: throw new System.Exception("something wrong here!"); } } }
public void MutateSelf( Random randomness_provider, PD_Game gameState, PD_AI_PathFinder pathFinder, PD_AI_Macro_Agent_Base defaultPolicyAgent, PD_AI_Macro_Agent_Base mutatorAgent, double mutationRate, Random randomnessProvider ) { PD_Game mutationGameState = gameState.Request_Randomized_Copy(randomness_provider); bool atLeastOneMutation = false; for (int i = 0; i < Genome.Count; i++) { if (mutationGameState.GQ_Is_Ongoing()) { double mutationChance = randomnessProvider.NextDouble(); if (mutationChance < mutationRate) { atLeastOneMutation = true; Genome[i].MutateSelf( randomness_provider, mutationGameState, pathFinder, defaultPolicyAgent, mutatorAgent ); Genome[i].ApplySelfOnGameState( randomness_provider, mutationGameState, pathFinder, defaultPolicyAgent ); } else { Genome[i].ApplySelfOnGameState( randomness_provider, mutationGameState, pathFinder, defaultPolicyAgent ); } } } if (atLeastOneMutation == false) { PD_Game newMutationGameState = gameState.Request_Randomized_Copy(randomness_provider); int randomMutationIndex = randomnessProvider.Next(Genome.Count); for (int i = 0; i < Genome.Count; i++) { if (newMutationGameState.GQ_Is_Ongoing()) { if (i != randomMutationIndex) { Genome[i].ApplySelfOnGameState( randomness_provider, newMutationGameState, pathFinder, defaultPolicyAgent ); } else if (i == randomMutationIndex) { Genome[i].MutateSelf( randomness_provider, newMutationGameState, pathFinder, defaultPolicyAgent, mutatorAgent ); Genome[i].ApplySelfOnGameState( randomness_provider, newMutationGameState, pathFinder, defaultPolicyAgent ); } } } } if ( mutationGameState.GQ_Is_Ongoing() && IsComplete() == false ) { Expand( randomness_provider, mutationGameState, pathFinder, defaultPolicyAgent ); } }