public static void RenderAverageValues(IStateActionValues values) { for (var y = 3; y >= 0; y--) { var vals = Enumerable.Range(0, 12) .Select(x => RenderAverageValueAtPosition(values, new Position(x, y))); var line = string.Join(" ", vals); System.Console.WriteLine(line); } }
private static string RenderHighestValueAtPosition(IStateActionValues values, Position pos) { var posValues = values.ActionValues(pos).Select(av => av.Item2).ToList(); if (posValues.Count == 0) { return(" "); } const string format = "{0:0000.0;-000.0}"; return(string.Format(format, posValues.Max())); }
private static IEnumerable <Position> GreedyPath( CliffWalkingEnvironment env, IStateActionValues values) { var currentPosition = env.Reset(); var isDone = false; while (!isDone) { yield return(currentPosition); var bestAction = values .ActionValues(currentPosition) .OrderBy(av => av.Item2) .Last().Item1; var(observation, _, done) = env.Step(bestAction); currentPosition = observation; isDone = done; } yield return(currentPosition); }