Exemplo n.º 1
0
        /// <summary>
        /// Iterates over the training episodes
        /// </summary>
        /// <param name="epsilon">The epsilon value used to determine whether to perform exploration/exploitation</param>
        /// <param name="overrideBaseEvents">Allows the caller to override default events</param>
        protected virtual void RunTrainingSet(double epsilon, bool overrideBaseEvents)
        {
            for (int episode = 0; episode < NumberOfTrainingEpisodes; ++episode)
            {
                var episodeResults = RunTrainingEpisode(epsilon, overrideBaseEvents);
                var state          = episodeResults.finalState;
                var moves          = episodeResults.moves;

                if (Environment.IsTerminalState(state, moves, MaximumAllowedMoves) &&
                    (1 + episode) % Environment.QualitySaveFrequency == 0)
                {
                    var trainingEpisode = Environment.SaveQualityForEpisode(episode + 1, moves, Score);
                    TrainingSessions.Add(trainingEpisode);
                }

                if (UseDecayingEpsilon)
                {
                    epsilon = DecayEpsilon(episode, epsilon);
                }

                if (!overrideBaseEvents)
                {
                    OnTrainingEpisodeCompleted(episode, NumberOfTrainingEpisodes, _trainingEpisodeStartPoint, moves, Score, Environment.TerminalStates.Contains(state % Environment.StatesPerPhase));
                }
            }
        }
Exemplo n.º 2
0
        public void InitializeSessions()
        {
            var trainingSessions = new List <TrainingSessionEx>();
            var sessions         = _agent.TrainingSessions.OrderBy(e => e.Episode).ToList();
            var agent            = MazeUtilities.ConvertLoadedAgent(_agent);

            agent.Environment     = MazeUtilities.CopyEnvironment(_agent.Environment);
            agent.AgentCompleted += Agent_AgentCompleted;

            for (int i = sessions.Count - 1; i >= 0; --i)
            {
                var session = new TrainingSessionEx
                {
                    Episode = sessions[i].Episode,
                    Moves   = sessions[i].Moves,
                    Quality = sessions[i].Quality,
                    Score   = sessions[i].Score
                };

                _moves = 0;
                _score = 0;

                agent.Environment.QualityTable = session.Quality;

                try
                {
                    agent.Run(agent.StartPosition);
                    session.Succeeded = true;
                }
                catch
                {
                    session.Succeeded = false;
                }

                session.Moves = _moves;
                session.Score = _score;

                trainingSessions.Add(session);
            }

            var selection = trainingSessions
                            .GroupBy(g => new
            {
                g.Moves,
                g.Score,
                g.Succeeded
            })
                            .Select(t => new TrainingSessionEx()
            {
                MinEpisode = t.Last().Episode,
                MaxEpisode = t.First().Episode,
                Episode    = t.Last().Episode,
                Moves      = t.Key.Moves,
                Score      = t.Key.Score,
                Succeeded  = t.Key.Succeeded,
                Quality    = t.First().Quality
            });

            trainingSessions = selection
                               .OrderByDescending(s => s.Succeeded)
                               .ThenByDescending(m => m.Moves)
                               .ThenByDescending(e => e.MinEpisode).ToList();

            TrainingSessions = trainingSessions;
            SelectedSession  = TrainingSessions.FirstOrDefault();

            agent.AgentCompleted -= Agent_AgentCompleted;
        }