public static void GetStateAndAction(int i, int j, int k, out InvertedPendulumState state, out InvertedPendulumAction action)
 {
     GetState(j, k, out state);
     GetAction(i, out action);
 }
 public static void GetActionIndices(InvertedPendulumAction action, out int i)
 {
     i = action.action + 1;
 }
        public float[, ,] value = new float[3, LEN, LEN];//-1,0,1;-pi..pi;-10..10

        public static void GetIndices(InvertedPendulumState state, InvertedPendulumAction action, out int i, out int j, out int k)
        {
            GetStateIndices(state, out j, out k);
            GetActionIndices(action, out i);
        }
        public void GetBestActionAndUtilityForState(ReinforcementLearningState state, out ReinforcementLearningAction action, out float retval)
        {
            float max = float.MinValue;
            int maxact = 0;

            int i;
            int j;
            int k;
            GetStateIndices((InvertedPendulumState)state, out j, out k);      

            for (int act = -1; act <= 1; ++act)
            {
                GetActionIndices(new InvertedPendulumAction(act), out i);              
                if (value[i, j, k] > max)
                {
                    max = value[i, j, k];
                    maxact = act;
                }
            }
            if (value[1, j, k] == max) maxact = 0;


            action = new InvertedPendulumAction(maxact);
            retval = max;
        }        
 public static void GetAction(int i, out InvertedPendulumAction action)
 {
     action = new InvertedPendulumAction(i - 1);
 }
Beispiel #6
0
        private void button5_Click(object sender, EventArgs e)
        {
            adp.Train();
            customControl21.Invalidate();

            Random r = new Random();

            InvertedPendulumQStore QStore =  (InvertedPendulumQStore)adp.qstore;

            for (int aa = 0; aa < 100000; ++aa)
            {
                InvertedPendulumState state = new InvertedPendulumState((float)(2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 3);
                InvertedPendulumAction action = new InvertedPendulumAction(r.Next(3) - 1);

                ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.Evaluate(state, action) }));
            }

            //for (int j = 0; j < QStore.value.GetLength(1); ++j)
            //{
            //    for (int k = 0; k < QStore.value.GetLength(2); ++k)
            //    {
            //        for (int i = 0; i < QStore.value.GetLength(0); ++i)
            //        {
            //            InvertedPendulumState state;
            //            InvertedPendulumAction action;
            //            InvertedPendulumQStore.GetStateAndAction(i, j, k, out state, out action);
            //            ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.value[i, j, k] }));
            //        }
            //    }
            //}
            
            
            esigmnViewer1.Invalidate();
        }