static void Main(string[] args) { var env = new CliffWalkingEnvironment(); var sarsaAgent = new SarsaCliffWalker(); var values = sarsaAgent.ImproveEstimates(env, out var tdDiags, 10000); System.Console.WriteLine("td 0 avg Values:"); StateActionValuesConsoleRenderer.RenderAverageValues(values); System.Console.WriteLine("td 0 highest Values:"); StateActionValuesConsoleRenderer.RenderHighestValues(values); System.Console.WriteLine(""); System.Console.WriteLine("td 0 Greedy path:"); ConsolePathRenderer.RenderPath(GreedyPath(env, values)); var qAgent = new QLearningCliffWalker(); var qValues = qAgent.ImproveEstimates(env, out var qDiags, 10000); System.Console.WriteLine(""); System.Console.WriteLine("q learning avg Values:"); StateActionValuesConsoleRenderer.RenderAverageValues(qValues); System.Console.WriteLine("q learning highest Values:"); StateActionValuesConsoleRenderer.RenderHighestValues(qValues); System.Console.WriteLine(""); System.Console.WriteLine("q learning Greedy path:"); ConsolePathRenderer.RenderPath(GreedyPath(env, qValues)); }
public static void Run() { const int numEpisodes = 100; var env = new CliffWalkingEnvironment(); var sarsaAgent = new SarsaCliffWalker(0.1, 0.1); var qLearningAgent = new QLearningCliffWalker(0.1, 0.1); var tdAverageRewards = CollectAverageRewardSums(sarsaAgent, env, numEpisodes); var qLearningAverageRewards = CollectAverageRewardSums(qLearningAgent, env, numEpisodes); var plotter = new Plotter(); var plt = plotter.Plt; plt.Title("Average total reward per episode"); var dataX = Enumerable.Range(0, numEpisodes).Select(i => (double)i).ToArray(); plt.PlotScatter(dataX, tdAverageRewards, label: "TD 0 (Sarsa)"); plt.PlotScatter(dataX, qLearningAverageRewards, label: "Q learning"); plt.XLabel("Episodes"); plt.YLabel("Average total reward"); plt.Legend(); plotter.Show(); }