public void BadState_Throws() { var returns = new StateReturns(5); Assert.Throws <IndexOutOfRangeException>(() => returns.Add(-1, 1)); Assert.Throws <IndexOutOfRangeException>(() => returns.Add(5, 1)); }
public void Averages_AreCorrect() { var returns = new StateReturns(5); returns.Add(4, 2); returns.Add(4, 4); Assert.AreEqual(3, returns.AverageReturnFrom(4)); }
private void ImproveEstimates(RandomWalkEnvironment environment) { var rewardSum = 0.0; var episode = RandomWalkEpisode.Generate(environment); foreach (var t in Enumerable.Range(0, episode.Length - 1).Reverse()) { var state = episode.Steps[t].State; rewardSum += episode.Steps[t + 1].Reward; if (episode.TimeOfFirstVisit(state) == t) { _returns.Add(state, rewardSum); _values[state] = _returns.AverageReturnFrom(state); } } }