示例#1
0
        public RL_TDlambda()
        {
            InitializeComponent();
            sim = new InvertedPendulumEnvironmentSimulator();

            igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
            igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
            tab1 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI });
            tab2 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI });
            r = new Random();
            isRunning = true;
            trainingThread = new Thread(new ThreadStart(train));
            trainingThread.Start();
            timer1.Start();
        }
        public RL_TDlambda_acrobot()
        {
            InitializeComponent();
            sim = new AcrobotEnvironmentSimulator2();

            igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));
            igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));

            tab1 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI });
            tab2 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI });

            r = new Random();
            isRunning = true;
            trainingThread = new Thread(new ThreadStart(train));
            trainingThread.Start();
            timer1.Start();
        }