/// <summary> /// Iterates over the training episodes /// </summary> /// <param name="epsilon">The epsilon value used to determine whether to perform exploration/exploitation</param> /// <param name="overrideBaseEvents">Allows the caller to override default events</param> protected virtual void RunTrainingSet(double epsilon, bool overrideBaseEvents) { for (int episode = 0; episode < NumberOfTrainingEpisodes; ++episode) { var episodeResults = RunTrainingEpisode(epsilon, overrideBaseEvents); var state = episodeResults.finalState; var moves = episodeResults.moves; if (Environment.IsTerminalState(state, moves, MaximumAllowedMoves) && (1 + episode) % Environment.QualitySaveFrequency == 0) { var trainingEpisode = Environment.SaveQualityForEpisode(episode + 1, moves, Score); TrainingSessions.Add(trainingEpisode); } if (UseDecayingEpsilon) { epsilon = DecayEpsilon(episode, epsilon); } if (!overrideBaseEvents) { OnTrainingEpisodeCompleted(episode, NumberOfTrainingEpisodes, _trainingEpisodeStartPoint, moves, Score, Environment.TerminalStates.Contains(state % Environment.StatesPerPhase)); } } }
public void InitializeSessions() { var trainingSessions = new List <TrainingSessionEx>(); var sessions = _agent.TrainingSessions.OrderBy(e => e.Episode).ToList(); var agent = MazeUtilities.ConvertLoadedAgent(_agent); agent.Environment = MazeUtilities.CopyEnvironment(_agent.Environment); agent.AgentCompleted += Agent_AgentCompleted; for (int i = sessions.Count - 1; i >= 0; --i) { var session = new TrainingSessionEx { Episode = sessions[i].Episode, Moves = sessions[i].Moves, Quality = sessions[i].Quality, Score = sessions[i].Score }; _moves = 0; _score = 0; agent.Environment.QualityTable = session.Quality; try { agent.Run(agent.StartPosition); session.Succeeded = true; } catch { session.Succeeded = false; } session.Moves = _moves; session.Score = _score; trainingSessions.Add(session); } var selection = trainingSessions .GroupBy(g => new { g.Moves, g.Score, g.Succeeded }) .Select(t => new TrainingSessionEx() { MinEpisode = t.Last().Episode, MaxEpisode = t.First().Episode, Episode = t.Last().Episode, Moves = t.Key.Moves, Score = t.Key.Score, Succeeded = t.Key.Succeeded, Quality = t.First().Quality }); trainingSessions = selection .OrderByDescending(s => s.Succeeded) .ThenByDescending(m => m.Moves) .ThenByDescending(e => e.MinEpisode).ToList(); TrainingSessions = trainingSessions; SelectedSession = TrainingSessions.FirstOrDefault(); agent.AgentCompleted -= Agent_AgentCompleted; }