private Pokémon LoseThisBattle(IntelligentPokéAgent agentAi) { //Apply reward agentAi.ApplyRewardLose(); //Update the learner //Return the winning pokemon return(defender); }
private Pokémon WinThisBattle(IntelligentPokéAgent agentAi) { //Apply reward agentAi.ApplyRewardWin(); //Update the learner //Return the winning pokemon return(agent); }
static void Main(string[] args) { /* * //TESTING PORTION * //A pre-made Porygon * Pokémon pokemon1 = RentalPokémon.RentalPorygon; * //A pre-made Venusaur * Pokémon pokemon2 = RentalPokémon.RentalVenusaur; * Console.WriteLine("Porygon health: " + pokemon1.RemainingHealth); * Console.WriteLine("Venusaur health: " + pokemon2.RemainingHealth); * * //An example of Porygon attacking Venusaur * int damageDealt = pokemon1.Use(2, pokemon2); * Console.WriteLine("Damage dealt by Porygon: " + damageDealt); * * //Checking remaining health * int p1Remaining = pokemon1.RemainingHealth; * int p2Remaining = pokemon2.RemainingHealth; * Console.WriteLine("Porygon health: " + p1Remaining); * Console.WriteLine("Venusaur health: " + p2Remaining); * * //See if Pokémon has fainted * bool hasFainted = pokemon2.IsFainted; * Console.WriteLine("Did Venusaur faint? " + hasFainted); */ //Do the experiment a certain number of times int numIterations = 30; //Perform a number of pokemon battles each iteration int numBattles = 20000; //Keep track of running sums for averaging later List <int> runningSumOfRewards = new List <int>(); List <double> averageOfRewards = new List <double>(); for (int j = 0; j < numBattles; ++j) { runningSumOfRewards.Add(0); averageOfRewards.Add(0); } for (int j = 1; j <= numIterations; ++j) { //Track stats int agentWins = 0; int opponentWins = 0; //Create the AI for our battles IntelligentPokéAgent agentAi = new IntelligentPokéAgent(); //Initialize learning algorithm agentAi.EstimateRewards(); //These are for the comparison algorithm, a SARSA algorithm from accord.net machine learning library /* * var epol = new EpsilonGreedyExploration(IntelligentPokéAgent.EPSILON); * var tpol = new TabuSearchExploration(IntelligentPokéAgent.ACTION_SPACE, epol); * var sarsa = new Sarsa(IntelligentPokéAgent.STATE_SPACE, IntelligentPokéAgent.ACTION_SPACE, tpol); * agentAi.comparisonAlgorithm = sarsa; */ //agentAi.qlearn.LearningRate = IntelligentPokéAgent.ALPHA; //agentAi.qlearn.DiscountFactor = IntelligentPokéAgent.GAMMA; RandomPokéAgent opponentAi = new RandomPokéAgent(); //SmartPokéAgent opponentAi = new SmartPokéAgent(); //Borrow some already-created pokemon for the battle List <Pokémon> agents = new List <Pokémon>(); Pokémon agent1 = RentalPokémon.RentalVenusaur; Pokémon agent2 = RentalPokémon.RentalBlastoise; Pokémon agent3 = RentalPokémon.RentalCharizard; Pokémon agent4 = RentalPokémon.RentalPorygon; Pokémon agent5 = RentalPokémon.RentalGengar; agents.Add(agent1); agents.Add(agent2); agents.Add(agent3); agents.Add(agent4); agents.Add(agent5); List <Pokémon> opponents = new List <Pokémon>(); Pokémon opponent1 = RentalPokémon.RentalVenusaur; Pokémon opponent2 = RentalPokémon.RentalBlastoise; Pokémon opponent3 = RentalPokémon.RentalCharizard; Pokémon opponent4 = RentalPokémon.RentalPorygon; Pokémon opponent5 = RentalPokémon.RentalGengar; opponents.Add(opponent1); opponents.Add(opponent2); opponents.Add(opponent3); opponents.Add(opponent4); opponents.Add(opponent5); Pokémon agent; Pokémon opponent; Battle testBattle = new Battle(); //Battle for (int i = 0; i < numBattles; ++i) { //Start a new episode agentAi.StartNewBattleEpisode(); //Decrease exploration rate gradually agentAi.variableEpsilon = IntelligentPokéAgent.EPSILON - (i / (double)numBattles) * IntelligentPokéAgent.EPSILON; //agentAi.variableEpsilon *= IntelligentPokéAgent.EPSILON_DECAY; /* * //TESTING - reset stuff for this battle * epol.Epsilon = IntelligentPokéAgent.EPSILON - (i / (double)numBattles) * IntelligentPokéAgent.EPSILON; * //agentAi.qlearn.LearningRate = IntelligentPokéAgent.ALPHA - (i / (double)numBattles) * IntelligentPokéAgent.ALPHA; * tpol.ResetTabuList(); */ //Get a random agent and opponent agent = agents[rnd.Next(agents.Count)]; //agent = agent4; //porygon is a good agent opponent = opponents[rnd.Next(opponents.Count)]; //opponent = opponent1; //venusaur is a simple opponent while (opponent.Species.Name == agent.Species.Name) { //No doubles opponent = opponents[rnd.Next(opponents.Count)]; } //~~~~~~~~~~~~~~~~~~~~~TESTING: what if pokemon had much more health?~~~~~~~~~~~~~~~~~~~~~~~~~~~ //agent.Stats[Stat.HP] = 1000; //opponent.Stats[Stat.HP] = 1000; agent.Heal(); opponent.Heal(); //Print battle text to console Console.WriteLine("~~~~~ITERATION " + j + ", BATTLE " + (i + 1) + "~~~~~"); Console.WriteLine("A wild " + opponent.Species.Name + " appears! Go, " + agent.Species.Name + "!"); //Do the battle and record the winner Pokémon winner = testBattle.DoBattle(agent, agentAi, opponent, opponentAi); //Print winner to console Console.WriteLine("The winner is " + winner.Species.Name + ", with " + winner.RemainingHealth + " HP left!\n"); //Increment stats if (winner == agent) { agentWins++; } else { opponentWins++; } //Refresh pokemon health agent.Heal(); opponent.Heal(); } //Print out stats Console.WriteLine("Out of " + numBattles + " battles:"); Console.WriteLine("\tThe agent won " + agentWins + " times."); Console.WriteLine("\tThe opponent won " + opponentWins + " times."); /* * Console.WriteLine("Agent total reward per battle: "); * for (int i = 0; i < numBattles; ++i) * { * //Write rewards in a cluster * Console.Write("" + agentAi.myRewards[i]); * if (i == numBattles - 1) { Console.WriteLine(); } * else { Console.Write(", "); } * * //Write rewards by line * //Console.WriteLine("\tBattle #" + (i+1) + ": " + agentAi.myRewards[i]); * } */ //Write rewards to file for graphing //String outputFile = "rewarddata" + j + ".txt"; //using (System.IO.StreamWriter outputWriter = new System.IO.StreamWriter(outputFile)) //{ for (int i = 0; i < numBattles; ++i) { //outputWriter.WriteLine("" + i + ", " + agentAi.myBattleRewards[i]); //Also add to the running sums runningSumOfRewards[i] += agentAi.myBattleRewards[i]; } //} /* * outputFile = "rewarddata" + j + ".csv"; * using (System.IO.StreamWriter outputWriter = new System.IO.StreamWriter(outputFile)) * { * for (int i = 0; i < numBattles; ++i) * { * outputWriter.WriteLine("" + i + ", " + agentAi.myBattleRewards[i]); * } * } */ } //Finally, go through and average all the rewards from all the iterations for (int j = 0; j < numBattles; ++j) { averageOfRewards[j] = (int)Math.Round(runningSumOfRewards[j] / (double)numIterations); } //Finally, write the averages to an outfile String outputFileAvg = "rewarddata_averages.txt"; using (System.IO.StreamWriter outputWriter = new System.IO.StreamWriter(outputFileAvg)) { for (int i = 0; i < numBattles; ++i) { outputWriter.WriteLine("" + i + ", " + averageOfRewards[i]); } } //Done return; }
public Pokémon DoBattle(Pokémon agentPokémon, IntelligentPokéAgent agentAi, Pokémon defenderPokémon, IPokéAgent defenderAi) { //Store variables agent = agentPokémon; defender = defenderPokémon; //Reset agent state agentAi.ResetState(this); //Print log to console //Console.WriteLine("RL STATE NUMBER: " + agentAi.currentState); Console.WriteLine("\tLevel " + agent.Level + " " + agent.Species.Name + " has " + agent.RemainingHealth + " health."); /* * Console.WriteLine("\t\tAttack " + agent.Stats[Stat.Attack]); * Console.WriteLine("\t\tDefense " + agent.Stats[Stat.Defense]); * Console.WriteLine("\t\tHP " + agent.Stats[Stat.HP]); * Console.WriteLine("\t\tSpecial " + agent.Stats[Stat.Special]); * Console.WriteLine("\t\tSpeed " + agent.Stats[Stat.Speed]); */ Console.WriteLine("\tLevel " + defender.Level + " " + defender.Species.Name + " has " + defender.RemainingHealth + " health."); /* * Console.WriteLine("\t\tAttack " + defender.Stats[Stat.Attack]); * Console.WriteLine("\t\tDefense " + defender.Stats[Stat.Defense]); * Console.WriteLine("\t\tHP " + defender.Stats[Stat.HP]); * Console.WriteLine("\t\tSpecial " + defender.Stats[Stat.Special]); * Console.WriteLine("\t\tSpeed " + defender.Stats[Stat.Speed]); */ //Perform the battle until one pokemon has fainted bool weFainted = false; bool theyFainted = false; while (true) { agentAi.StartNewTurnEpisode(); //Fastest pokemon goes first //TODO: what do we do if speed is the same? if (ComparePokémonSpeed(agent, defender) >= 0) { //We are faster //Agent pokemon's turn agentReward = DoTurn(agent, defender, agentAi); agentAi.ApplyRewardDealDamage(agentReward); //~~~~~~~~~~~~~~~TESTING: what if ice beam is much more effective?~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //if (agentAi.lastAction == 0) { defender.Damage += 100; } //Did they faint? if (defender.IsFainted) { theyFainted = true; } else { //Opponent pokemon's turn agentReward = DoTurn(defender, agent, defenderAi); agentAi.ApplyRewardTakeDamage(agentReward); //Did we faint? if (agent.IsFainted) { weFainted = true; } } //Console.WriteLine("\t\tFrom state " + agentAi.lastState + " to state " + agentAi.currentState + //" by action " + agentAi.lastAction + " for reward " + agentAi.lastReward + "."); } else { //Opponent is faster //Opponent pokemon's turn agentReward = DoTurn(defender, agent, defenderAi); agentAi.ApplyRewardTakeDamage(agentReward); //Did we faint? if (agent.IsFainted) { weFainted = true; } else { //Agent pokemon's turn agentReward = DoTurn(agent, defender, agentAi); agentAi.ApplyRewardDealDamage(agentReward); //Did they faint? if (defender.IsFainted) { theyFainted = true; } } //Console.WriteLine("\t\tFrom state " + agentAi.lastState + " to state " + agentAi.currentState); } //If someone fainted, assign additional reward for winning or losing the battle if (weFainted) { agentAi.ApplyRewardLose(); } if (theyFainted) { agentAi.ApplyRewardWin(); } //Update the learner agentAi.LearnerUpdate(this); //Console.WriteLine("\t\tFrom state " + agentAi.lastState + " to state " + agentAi.currentState + //" by action " + agentAi.lastAction + " for reward " + agentAi.lastReward + "."); //If someone fainted, break the loop if (weFainted || theyFainted) { break; } } //The battle is over //Return the winning pokemon if (weFainted) { return(defender); } else { return(agent); } }