private static IEnumerable <double> GatherInterimPerformance( IEnumerable <double> learningRates, Func <double, ICliffWalkingAgent> createAgentFunc) { const int numRuns = 50; var env = new CliffWalkingEnvironment(); foreach (var rate in learningRates) { var firstXEpisodeAverages = new List <double>(); for (var i = 0; i < numRuns; i++) { env.Reset(); var agent = createAgentFunc(rate); agent.ImproveEstimates(env, out var diags, NumEpisodesForInterim); firstXEpisodeAverages.Add(diags.RewardSumPerEpisode.Average()); } yield return(firstXEpisodeAverages.Average()); } }
private static IEnumerable <double> GatherAsymptoticPerformance( IEnumerable <double> learningRates, Func <double, ICliffWalkingAgent> createAgentFunc) { var env = new CliffWalkingEnvironment(); foreach (var rate in learningRates) { env.Reset(); var agent = createAgentFunc(rate); var sw = Stopwatch.StartNew(); agent.ImproveEstimates(env, out var diags, NumEpisodesForAsymptote); Console.WriteLine($"ran {NumEpisodesForAsymptote} episodes in {sw.Elapsed}"); yield return(diags.RewardSumPerEpisode.Average()); } }
private static IEnumerable <Position> GreedyPath( CliffWalkingEnvironment env, IStateActionValues values) { var currentPosition = env.Reset(); var isDone = false; while (!isDone) { yield return(currentPosition); var bestAction = values .ActionValues(currentPosition) .OrderBy(av => av.Item2) .Last().Item1; var(observation, _, done) = env.Step(bestAction); currentPosition = observation; isDone = done; } yield return(currentPosition); }