static void Main(string[] args) { var env = new CliffWalkingEnvironment(); var sarsaAgent = new SarsaCliffWalker(); var values = sarsaAgent.ImproveEstimates(env, out var tdDiags, 10000); System.Console.WriteLine("td 0 avg Values:"); StateActionValuesConsoleRenderer.RenderAverageValues(values); System.Console.WriteLine("td 0 highest Values:"); StateActionValuesConsoleRenderer.RenderHighestValues(values); System.Console.WriteLine(""); System.Console.WriteLine("td 0 Greedy path:"); ConsolePathRenderer.RenderPath(GreedyPath(env, values)); var qAgent = new QLearningCliffWalker(); var qValues = qAgent.ImproveEstimates(env, out var qDiags, 10000); System.Console.WriteLine(""); System.Console.WriteLine("q learning avg Values:"); StateActionValuesConsoleRenderer.RenderAverageValues(qValues); System.Console.WriteLine("q learning highest Values:"); StateActionValuesConsoleRenderer.RenderHighestValues(qValues); System.Console.WriteLine(""); System.Console.WriteLine("q learning Greedy path:"); ConsolePathRenderer.RenderPath(GreedyPath(env, qValues)); }
public void AfterImprovingEstimates_StartingPosition_HasActionValues() { var env = new CliffWalkingEnvironment(); var agent = new SarsaCliffWalker(); var values = agent.ImproveEstimates(env, out var diags); Assert.IsNotEmpty(values.ActionValues(new Position(0, 0))); }