Пример #1
0
        public double[] Estimate(RandomWalkEnvironment environment, int?episodeLimit = null)
        {
            _values = Enumerable.Range(0, environment.NumPositions).Select(_ => 0.0).ToArray();

            var maxEpisodes = episodeLimit ?? 10000;

            for (var i = 0; i < maxEpisodes; i++)
            {
                var states = new List <int> {
                    environment.Reset()
                };
                var rewards = new List <double> {
                    0.0
                };

                var episodeLength = int.MaxValue;
                var t             = 0;
                var tau           = 0;

                for (; tau < episodeLength - 1; t++)
                {
                    tau = t - _numSteps + 1;
                    if (t < episodeLength)
                    {
                        var(nextState, reward, done) = environment.Step();
                        states.Add(nextState);
                        rewards.Add(reward);

                        if (done)
                        {
                            episodeLength = t + 1;
                        }
                    }
                    if (tau >= 0)
                    {
                        var G = rewards.Skip(tau + 1).Take(_numSteps).Sum();
                        if (tau + _numSteps < episodeLength)
                        {
                            G += Value(states[tau + _numSteps]);
                        }

                        var currentValue = Value(states[tau]);
                        var updatedValue = currentValue + _learningRate * (G - currentValue);
                        _values[states[tau]] = updatedValue;
                    }
                }
            }

            return(_values);
        }
Пример #2
0
        public double[] Estimate(RandomWalkEnvironment environment, int?episodeLimit = null)
        {
            _values = Enumerable.Range(0, environment.NumPositions).Select(_ => 0.5).ToArray();

            var maxEpisodes = episodeLimit ?? 10000;

            for (var i = 0; i < maxEpisodes; i++)
            {
                var  state = environment.Reset();
                bool done;

                do
                {
                    var(nextState, reward, isDone) = environment.Step();
                    done           = isDone;
                    _values[state] = Value(state) +
                                     _learningRate * (reward + Value(nextState) - Value(state));
                    state = nextState;
                } while (!done);
            }

            return(_values);
        }