Beispiel #1
0
        static void Main(string[] args)
        {
            var env        = new CliffWalkingEnvironment();
            var sarsaAgent = new SarsaCliffWalker();

            var values = sarsaAgent.ImproveEstimates(env, out var tdDiags, 10000);

            System.Console.WriteLine("td 0 avg Values:");
            StateActionValuesConsoleRenderer.RenderAverageValues(values);
            System.Console.WriteLine("td 0 highest Values:");
            StateActionValuesConsoleRenderer.RenderHighestValues(values);
            System.Console.WriteLine("");
            System.Console.WriteLine("td 0 Greedy path:");
            ConsolePathRenderer.RenderPath(GreedyPath(env, values));

            var qAgent  = new QLearningCliffWalker();
            var qValues = qAgent.ImproveEstimates(env, out var qDiags, 10000);

            System.Console.WriteLine("");
            System.Console.WriteLine("q learning avg Values:");
            StateActionValuesConsoleRenderer.RenderAverageValues(qValues);
            System.Console.WriteLine("q learning highest Values:");
            StateActionValuesConsoleRenderer.RenderHighestValues(qValues);
            System.Console.WriteLine("");
            System.Console.WriteLine("q learning Greedy path:");
            ConsolePathRenderer.RenderPath(GreedyPath(env, qValues));
        }
Beispiel #2
0
        public static void Run()
        {
            const int numEpisodes    = 100;
            var       env            = new CliffWalkingEnvironment();
            var       sarsaAgent     = new SarsaCliffWalker(0.1, 0.1);
            var       qLearningAgent = new QLearningCliffWalker(0.1, 0.1);

            var tdAverageRewards        = CollectAverageRewardSums(sarsaAgent, env, numEpisodes);
            var qLearningAverageRewards = CollectAverageRewardSums(qLearningAgent, env, numEpisodes);

            var plotter = new Plotter();
            var plt     = plotter.Plt;

            plt.Title("Average total reward per episode");
            var dataX = Enumerable.Range(0, numEpisodes).Select(i => (double)i).ToArray();

            plt.PlotScatter(dataX, tdAverageRewards, label: "TD 0 (Sarsa)");
            plt.PlotScatter(dataX, qLearningAverageRewards, label: "Q learning");

            plt.XLabel("Episodes");
            plt.YLabel("Average total reward");
            plt.Legend();

            plotter.Show();
        }