示例#1
0
        private static IEnumerable <double> GatherInterimPerformance(
            IEnumerable <double> learningRates,
            Func <double, ICliffWalkingAgent> createAgentFunc)
        {
            const int numRuns = 50;

            var env = new CliffWalkingEnvironment();

            foreach (var rate in learningRates)
            {
                var firstXEpisodeAverages = new List <double>();

                for (var i = 0; i < numRuns; i++)
                {
                    env.Reset();
                    var agent = createAgentFunc(rate);

                    agent.ImproveEstimates(env, out var diags, NumEpisodesForInterim);

                    firstXEpisodeAverages.Add(diags.RewardSumPerEpisode.Average());
                }

                yield return(firstXEpisodeAverages.Average());
            }
        }
示例#2
0
        private static IEnumerable <double> GatherAsymptoticPerformance(
            IEnumerable <double> learningRates,
            Func <double, ICliffWalkingAgent> createAgentFunc)
        {
            var env = new CliffWalkingEnvironment();

            foreach (var rate in learningRates)
            {
                env.Reset();
                var agent = createAgentFunc(rate);
                var sw    = Stopwatch.StartNew();

                agent.ImproveEstimates(env, out var diags, NumEpisodesForAsymptote);

                Console.WriteLine($"ran {NumEpisodesForAsymptote} episodes in {sw.Elapsed}");

                yield return(diags.RewardSumPerEpisode.Average());
            }
        }
示例#3
0
        private static IEnumerable <Position> GreedyPath(
            CliffWalkingEnvironment env, IStateActionValues values)
        {
            var currentPosition = env.Reset();
            var isDone          = false;

            while (!isDone)
            {
                yield return(currentPosition);

                var bestAction = values
                                 .ActionValues(currentPosition)
                                 .OrderBy(av => av.Item2)
                                 .Last().Item1;

                var(observation, _, done) = env.Step(bestAction);
                currentPosition           = observation;
                isDone = done;
            }

            yield return(currentPosition);
        }