private void NextEpisode(SMDPAgent agent) { config = RLConfig.Load("RLConfig.xml"); if (this.Episodes > config.numEpisodes) { return; } Console.WriteLine("Gets to NextEpisode() {0} times", cnt++); if (this.Episodes == 0) { fitness = 1; sum = 0; counter = 0; } this.Episodes++; agent.setEpsilon(config.lambda); fitness = RLGameWorld.FitnessValue(agent); reward = fitness; // This is to send a reward to endEpisode sum += fitness; if (this.Episodes % count_fit == 0) { counter++; average_fitness = sum / count_fit; // recordPerformance(this.Episodes, fitness); recordPerformance(counter, average_fitness); sum = 0; } //this.Previous = this.Current; //this.Current = this.Next; //this.Next = new KPEnvironment(this.Episode + 1); }
public void OneEpisode(SMDPAgent agent) { // call the evaluator epi++; this.NextEpisode(agent); if (this.OnEpisode == null) { // Console.WriteLine("OnEpisode is null !!!!!!"); return; } this.OnEpisode(this); }
public static double FitnessValue(SMDPAgent policy) { int cycles = 0; config = RLConfig.Load("RLConfig.xml"); switch (config.learningMethod) { case "Q_Learning": { for (int i = 0; i < players.Count; i++) { players[i].qlPolicy = (Q_LearningAgent)policy; } break; } case "SARSA": { for (int i = 0; i < players.Count; i++) { players[i].saPolicy = (SarsaAgent)policy; } break; } default: Console.WriteLine("Default case does not exist"); break; } do { timer.RunCycle(); cycles++; } while (!kref.episodeEnded); kref.episodeEnded = false; return(cycles / (10.0)); }
// public static int epi; public void OneEpisode(SMDPAgent agent) { // call the evaluator epi++; this.NextEpisode(agent); }