Exemple #1
0
        static void Main(string[] args)
        {
            var env        = new CliffWalkingEnvironment();
            var sarsaAgent = new SarsaCliffWalker();

            var values = sarsaAgent.ImproveEstimates(env, out var tdDiags, 10000);

            System.Console.WriteLine("td 0 avg Values:");
            StateActionValuesConsoleRenderer.RenderAverageValues(values);
            System.Console.WriteLine("td 0 highest Values:");
            StateActionValuesConsoleRenderer.RenderHighestValues(values);
            System.Console.WriteLine("");
            System.Console.WriteLine("td 0 Greedy path:");
            ConsolePathRenderer.RenderPath(GreedyPath(env, values));

            var qAgent  = new QLearningCliffWalker();
            var qValues = qAgent.ImproveEstimates(env, out var qDiags, 10000);

            System.Console.WriteLine("");
            System.Console.WriteLine("q learning avg Values:");
            StateActionValuesConsoleRenderer.RenderAverageValues(qValues);
            System.Console.WriteLine("q learning highest Values:");
            StateActionValuesConsoleRenderer.RenderHighestValues(qValues);
            System.Console.WriteLine("");
            System.Console.WriteLine("q learning Greedy path:");
            ConsolePathRenderer.RenderPath(GreedyPath(env, qValues));
        }
Exemple #2
0
        public void AfterImprovingEstimates_StartingPosition_HasActionValues()
        {
            var env   = new CliffWalkingEnvironment();
            var agent = new SarsaCliffWalker();

            var values = agent.ImproveEstimates(env, out var diags);

            Assert.IsNotEmpty(values.ActionValues(new Position(0, 0)));
        }