public RL_TDlambda_acrobot() { InitializeComponent(); sim = new AcrobotEnvironmentSimulator2(); igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); tab1 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI }); tab2 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI }); r = new Random(); isRunning = true; trainingThread = new Thread(new ThreadStart(train)); trainingThread.Start(); timer1.Start(); }
protected Bitmap VisualizeAcrobot() { int res = 1200; Bitmap bm = new Bitmap(res, res); Graphics g = Graphics.FromImage(bm); AcrobotEnvironmentSimulator2 simm = new AcrobotEnvironmentSimulator2(); double sumreward = 0; double[] state = new double[] { -Math.PI, 0, 0, 0}; for (int i = 0; i < 300; ++i) { double M; double q_act; double reward; GetAction(state, out M, out q_act, true); simm.Simulate(state, new double[] { M }, dT, out state, out reward); sumreward += reward; if (i % 1 == 0) { float zx = 75 + ((i % 100)) * 10; float zy = 150 + 150 * (i/100); float z = 50; float e1x = zx + (float)(z * simm.Length1 * Math.Sin(simm.Angle1)); float e1y = zy - (float)(z * simm.Length1 * Math.Cos(simm.Angle1)); float e2x = e1x + (float)(z * simm.Length2 * Math.Sin(simm.Angle1 + simm.Angle2)); float e2y = e1y - (float)(z * simm.Length2 * Math.Cos(simm.Angle1 + simm.Angle2)); //float e1x = zx + (float)(z * simm.Length1 * Math.Cos(simm.Angle1)); //float e1y = zy - (float)(z * simm.Length1 * Math.Sin(simm.Angle1)); //float e2x = e1x + (float)(z * simm.Length2 * Math.Cos(simm.Angle1 + simm.Angle2)); //float e2y = e1y - (float)(z * simm.Length2 * Math.Sin(simm.Angle1 + simm.Angle2)); Pen p = new Pen(Color.Black); g.DrawLine(p, zx, zy, e1x, e1y); p.Color = Color.Red; g.DrawLine(p, e1x, e1y, e2x, e2y); } } g.DrawString("sum reward: " + sumreward, new Font("Arial", 20), new SolidBrush(Color.Black), 20, 20); Console.Out.WriteLine(sumreward); if (bestsumreward < sumreward) { bestsumreward = sumreward; bm.Save("acrobot.png"); } return bm; }