private static string RenderHighestValueAtPosition(IStateActionValues values, Position pos) { var posValues = values.ActionValues(pos).Select(av => av.Item2).ToList(); if (posValues.Count == 0) { return(" "); } const string format = "{0:0000.0;-000.0}"; return(string.Format(format, posValues.Max())); }
private static IEnumerable <Position> GreedyPath( CliffWalkingEnvironment env, IStateActionValues values) { var currentPosition = env.Reset(); var isDone = false; while (!isDone) { yield return(currentPosition); var bestAction = values .ActionValues(currentPosition) .OrderBy(av => av.Item2) .Last().Item1; var(observation, _, done) = env.Step(bestAction); currentPosition = observation; isDone = done; } yield return(currentPosition); }