Esempio n. 1
0
        public void Averages_AreCorrect()
        {
            var returns = new StateReturns(5);

            returns.Add(4, 2);
            returns.Add(4, 4);

            Assert.AreEqual(3, returns.AverageReturnFrom(4));
        }
Esempio n. 2
0
        private void ImproveEstimates(RandomWalkEnvironment environment)
        {
            var rewardSum = 0.0;
            var episode   = RandomWalkEpisode.Generate(environment);

            foreach (var t in Enumerable.Range(0, episode.Length - 1).Reverse())
            {
                var state = episode.Steps[t].State;
                rewardSum += episode.Steps[t + 1].Reward;

                if (episode.TimeOfFirstVisit(state) == t)
                {
                    _returns.Add(state, rewardSum);
                    _values[state] = _returns.AverageReturnFrom(state);
                }
            }
        }