コード例 #1
0
        public RL_TDlambda_acrobot()
        {
            InitializeComponent();
            sim = new AcrobotEnvironmentSimulator2();

            igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));
            igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));

            tab1 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI });
            tab2 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI });

            r = new Random();
            isRunning = true;
            trainingThread = new Thread(new ThreadStart(train));
            trainingThread.Start();
            timer1.Start();
        }
コード例 #2
0
        protected Bitmap VisualizeAcrobot()
        {
            int res = 1200;

            Bitmap bm = new Bitmap(res, res);
            Graphics g = Graphics.FromImage(bm);
            AcrobotEnvironmentSimulator2 simm = new AcrobotEnvironmentSimulator2();

            double sumreward = 0;
            double[] state = new double[] { -Math.PI, 0, 0, 0};
            for (int i = 0; i < 300; ++i)
            {
                double M;
                double q_act;
                double reward;
                GetAction(state, out M, out q_act, true);
                simm.Simulate(state, new double[] { M }, dT, out state, out reward);
                sumreward += reward;
                if (i % 1 == 0)
                {
                    float zx = 75 + ((i % 100)) * 10;
                    float zy = 150 + 150 * (i/100);
                    float z = 50;

                    float e1x = zx + (float)(z * simm.Length1 * Math.Sin(simm.Angle1));
                    float e1y = zy - (float)(z * simm.Length1 * Math.Cos(simm.Angle1));

                    float e2x = e1x + (float)(z * simm.Length2 * Math.Sin(simm.Angle1 + simm.Angle2));
                    float e2y = e1y - (float)(z * simm.Length2 * Math.Cos(simm.Angle1 + simm.Angle2));

                    //float e1x = zx + (float)(z * simm.Length1 * Math.Cos(simm.Angle1));
                    //float e1y = zy - (float)(z * simm.Length1 * Math.Sin(simm.Angle1));

                    //float e2x = e1x + (float)(z * simm.Length2 * Math.Cos(simm.Angle1 + simm.Angle2));
                    //float e2y = e1y - (float)(z * simm.Length2 * Math.Sin(simm.Angle1 + simm.Angle2));
                  
                    Pen p = new Pen(Color.Black);
                    g.DrawLine(p, zx, zy, e1x, e1y);
                    p.Color = Color.Red;
                    g.DrawLine(p, e1x, e1y, e2x, e2y);
                }
            }
            g.DrawString("sum reward: " + sumreward, new Font("Arial", 20), new SolidBrush(Color.Black), 20, 20);
            Console.Out.WriteLine(sumreward);
            if (bestsumreward < sumreward)
            {
                bestsumreward = sumreward;
                bm.Save("acrobot.png");
            }
            return bm;
        }