Exemplo n.º 1
0
        public static void EstimateStateValuesWithMonteCarlo()
        {
            var env = new RandomWalkEnvironment(5, 3);

            var actualValues = new[]
            {
                1.0 / 6,
                2.0 / 6,
                3.0 / 6,
                4.0 / 6,
                5.0 / 6
            };

            var mcEstimator     = new McValueEstimator();
            var mcEstimates10   = mcEstimator.Estimate(env, 10);
            var mcEstimates100  = mcEstimator.Estimate(env, 100);
            var mcEstimates1000 = mcEstimator.Estimate(env, 1000);

            var plotter = new Plotter();
            var plt     = plotter.Plt;

            plt.Title("MC random walk estimates after X episodes");
            double[] dataX = { 1, 2, 3, 4, 5 };
            plt.PlotScatter(dataX, actualValues, label: "actual");
            plt.PlotScatter(dataX, mcEstimates10, label: "10");
            plt.PlotScatter(dataX, mcEstimates100, label: "100");
            plt.PlotScatter(dataX, mcEstimates1000, label: "1000");

            plt.Legend();

            plotter.Show();
        }
Exemplo n.º 2
0
        private static void CompareMcAndTd0()
        {
            var env = new RandomWalkEnvironment(5, 3);

            var actualValues = new[]
            {
                1.0 / 6,
                2.0 / 6,
                3.0 / 6,
                4.0 / 6,
                5.0 / 6
            };

            var mcEstimator         = new McValueEstimator();
            var td0ValueEstimator05 = new Td0ValueEstimator(0.05);
            var td0ValueEstimator10 = new Td0ValueEstimator(0.10);
            var td0ValueEstimator20 = new Td0ValueEstimator(0.20);

            var avgMcErrors   = new double[100];
            var avgTdErrors05 = new double[100];
            var avgTdErrors10 = new double[100];
            var avgTdErrors20 = new double[100];

            for (var i = 0; i < 100; i++)
            {
                var mcErrors   = new double[100];
                var tdErrors05 = new double[100];
                var tdErrors10 = new double[100];
                var tdErrors20 = new double[100];

                for (var j = 0; j < 100; j++)
                {
                    var mcEstimates   = mcEstimator.Estimate(env, i);
                    var tdEstimates05 = td0ValueEstimator05.Estimate(env, i);
                    var tdEstimates10 = td0ValueEstimator10.Estimate(env, i);
                    var tdEstimates20 = td0ValueEstimator20.Estimate(env, i);

                    mcErrors[j]   = AvgRmsError(actualValues, mcEstimates);
                    tdErrors05[j] = AvgRmsError(actualValues, tdEstimates05);
                    tdErrors10[j] = AvgRmsError(actualValues, tdEstimates10);
                    tdErrors20[j] = AvgRmsError(actualValues, tdEstimates20);
                }

                avgMcErrors[i]   = mcErrors.Average();
                avgTdErrors05[i] = tdErrors05.Average();
                avgTdErrors10[i] = tdErrors10.Average();
                avgTdErrors20[i] = tdErrors20.Average();
            }

            var plotter = new Plotter();
            var plt     = plotter.Plt;

            plt.Title("MC and TD0 RMS error vs num episodes");
            var dataX = Enumerable.Range(0, 100).Select(x => (double)x).ToArray();

            plt.PlotScatter(dataX, avgMcErrors, label: "mc");
            plt.PlotScatter(dataX, avgTdErrors05, label: "td, learning rate: 0.05");
            plt.PlotScatter(dataX, avgTdErrors10, label: "td, learning rate: 0.10");
            plt.PlotScatter(dataX, avgTdErrors20, label: "td, learning rate: 0.20");

            plt.Legend(location: legendLocation.upperRight);

            plotter.Show();
        }