public RL_TDlambda() { InitializeComponent(); sim = new InvertedPendulumEnvironmentSimulator(); igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 })); tab1 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI }); tab2 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI }); r = new Random(); isRunning = true; trainingThread = new Thread(new ThreadStart(train)); trainingThread.Start(); timer1.Start(); }
public RL_TDlambda_acrobot() { InitializeComponent(); sim = new AcrobotEnvironmentSimulator2(); igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); tab1 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI }); tab2 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI }); r = new Random(); isRunning = true; trainingThread = new Thread(new ThreadStart(train)); trainingThread.Start(); timer1.Start(); }