private int GetClosestStateIndex(List<InvertedPendulumState> states, InvertedPendulumState curstate)
 {
     double mindist = float.MaxValue;
     int closeststate = 0;
     for (int i = 0; i < states.Count; ++i)
     {
         InvertedPendulumState state = states[i];
         double dist = (state.a - curstate.a) * (state.a - curstate.a) + (state.w - curstate.w) * (state.w - curstate.w);
         if (mindist > dist)
         {
             mindist = dist;
             closeststate = i;
         }
     }
     return closeststate;
 }
 public static void GetStateAndAction(int i, int j, int k, out InvertedPendulumState state, out InvertedPendulumAction action)
 {
     GetState(j, k, out state);
     GetAction(i, out action);
 }
 public static void GetStateIndices(InvertedPendulumState state, out int j, out int k)
 {
     j = ((int)((state.a / Math.PI + 1) * LEN / 2) + LEN * 1000) % LEN;
     k = (int)((state.w / 3 + 1) * LEN / 2); if (k > LEN - 1) k = LEN - 1; if (k < 0) k = 0;
 }
        public float[, ,] value = new float[3, LEN, LEN];//-1,0,1;-pi..pi;-10..10

        public static void GetIndices(InvertedPendulumState state, InvertedPendulumAction action, out int i, out int j, out int k)
        {
            GetStateIndices(state, out j, out k);
            GetActionIndices(action, out i);
        }
 public static void GetState(int j, int k, out InvertedPendulumState state)
 {
     double a = (j / (float)(LEN / 2) - 1) * Math.PI;
     double w = (k / (float)(LEN / 2) - 1) * 3;
     state = new InvertedPendulumState(a, w);
 }
 public void ReplaceValue(InvertedPendulumState state, InvertedPendulumAction2 action, float value)
 {
     //TODO! delta??*
     igmn.Train(new Vector(new double[] { state.a, state.w, action.action, value }));
 }
 internal void SetState(InvertedPendulumState state)
 {
     this.a = state.a;
     this.w = state.w;
 }
        public ReinforcementLearningState State()
        {
            InvertedPendulumState state = new InvertedPendulumState(a, w);

            return state;
        }
예제 #9
0
        private void button5_Click(object sender, EventArgs e)
        {
            adp.Train();
            customControl21.Invalidate();

            Random r = new Random();

            InvertedPendulumQStore QStore =  (InvertedPendulumQStore)adp.qstore;

            for (int aa = 0; aa < 100000; ++aa)
            {
                InvertedPendulumState state = new InvertedPendulumState((float)(2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 3);
                InvertedPendulumAction action = new InvertedPendulumAction(r.Next(3) - 1);

                ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.Evaluate(state, action) }));
            }

            //for (int j = 0; j < QStore.value.GetLength(1); ++j)
            //{
            //    for (int k = 0; k < QStore.value.GetLength(2); ++k)
            //    {
            //        for (int i = 0; i < QStore.value.GetLength(0); ++i)
            //        {
            //            InvertedPendulumState state;
            //            InvertedPendulumAction action;
            //            InvertedPendulumQStore.GetStateAndAction(i, j, k, out state, out action);
            //            ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.value[i, j, k] }));
            //        }
            //    }
            //}
            
            
            esigmnViewer1.Invalidate();
        }
        public void GenerateMMatrices()
        {
            InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment();
            int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN;
            
            Ma = new SparseMatrix[3];
            for (int i = 0; i < 3; ++i)
            {
                Ma[i] = new SparseMatrix(statenum);
            }

            for (int ii = 0; ii < statenum; ++ii)
            {
                int j = ii % InvertedPendulumQStore.LEN;
                int k = ii / InvertedPendulumQStore.LEN;

                InvertedPendulumState state;
                InvertedPendulumQStore.GetState(j, k, out state);

                for (int i = 0; i < 3; ++i)
                {
                    int action = i - 1;
                    for (int i1 = 0; i1 < 1000; ++i1)
                    {
                        testws.a = state.a + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * Math.PI;
                        testws.w = state.w + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * 3;

                        testws.Step(action);

                        int j2, k2;
                        InvertedPendulumState state2 = new InvertedPendulumState(testws.a, testws.w);
                        InvertedPendulumQStore.GetStateIndices(state2, out j2, out k2);

                        int ii2 = k2 * InvertedPendulumQStore.LEN + j2;

                        Ma[i][ii, ii2] += 0.001f;
                    }
                }               
            }
            SaveMMatrices("mmatrices.dat");
        }