예제 #1
0
 public Form3()
 {
     InitializeComponent();
     ws = customControl11.Workspace = new InvertedPendulumEnvironment();               
     this.KeyPreview = true;
     timer1.Enabled = true;
 }
예제 #2
0
 public Form1()
 {
     InitializeComponent();
     ws = customControl11.Workspace = new InvertedPendulumEnvironment();
     oppWs = customControl12.Workspace = new InvertedPendulumEnvironment();            
     this.KeyPreview = true;
 }
        public void GenerateMMatrices()
        {
            InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment();
            int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN;
            
            Ma = new SparseMatrix[3];
            for (int i = 0; i < 3; ++i)
            {
                Ma[i] = new SparseMatrix(statenum);
            }

            for (int ii = 0; ii < statenum; ++ii)
            {
                int j = ii % InvertedPendulumQStore.LEN;
                int k = ii / InvertedPendulumQStore.LEN;

                InvertedPendulumState state;
                InvertedPendulumQStore.GetState(j, k, out state);

                for (int i = 0; i < 3; ++i)
                {
                    int action = i - 1;
                    for (int i1 = 0; i1 < 1000; ++i1)
                    {
                        testws.a = state.a + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * Math.PI;
                        testws.w = state.w + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * 3;

                        testws.Step(action);

                        int j2, k2;
                        InvertedPendulumState state2 = new InvertedPendulumState(testws.a, testws.w);
                        InvertedPendulumQStore.GetStateIndices(state2, out j2, out k2);

                        int ii2 = k2 * InvertedPendulumQStore.LEN + j2;

                        Ma[i][ii, ii2] += 0.001f;
                    }
                }               
            }
            SaveMMatrices("mmatrices.dat");
        }
        private void GenerateRVector()
        {
            int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN;

            InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment();
            R = new Vector(statenum);
            for (int ii = 0; ii < statenum; ++ii)
            {
                int j = ii % InvertedPendulumQStore.LEN;
                int k = ii / InvertedPendulumQStore.LEN;

                InvertedPendulumState state;
                InvertedPendulumQStore.GetState(j, k, out state);

                testws.a = state.a;
                testws.w = state.w;
                R.Elements[ii] = testws.Reward();
            }
        }
        public void Train()
        {
            InvertedPendulumEnvironment testenv = new InvertedPendulumEnvironment();
            int mincount = 100;


            states = new List<InvertedPendulumState>();            
            for(int i=0; i<mincount; ++i) {
                testenv.Reset();
                InvertedPendulumState newstate = (InvertedPendulumState)testenv.State();                
                states.Add(newstate);
            }            
            
            int statenum = states.Count;

           


            //allapot-atmenet valsegek es varhato rewardok szamitasa
            Vector R = new Vector(statenum);

            SparseMatrix[] Ma = new SparseMatrix[3];
            for (int i = 0; i < 3; ++i)
            {
                Ma[i] = new SparseMatrix(statenum);
            }

            Vector counts = new Vector(statenum);            

            for (int ii = 0; ii < statenum*10000; ++ii)
            {
                testenv.Reset();
                InvertedPendulumState curstate = (InvertedPendulumState)testenv.State();
                int from = GetClosestStateIndex(states, curstate);

                R.Elements[from] += testenv.Reward();

                for (int i = 0; i < 3; ++i)
                {
                    int action = i - 1;
                    testenv.SetState(curstate);
                    testenv.Step(action);
                    InvertedPendulumState tostate = (InvertedPendulumState)testenv.State();
                    int to = GetClosestStateIndex(states, tostate);

                    Ma[i][from, to] += 1;                    
                }

                float temp = (float)counts.Elements[from];
                if (temp != 0) temp = 1 / (1 / temp + 1);
                else temp = 1;
                counts.Elements[from] = temp;
            }

            for (int i = 0; i < 3; ++i)
            {
                Ma[i].Multiply(counts);
            }

            R.Multiply(counts);


            SparseMatrix M = (Ma[0] + Ma[1] + Ma[2]);
            M.Multiply(0.333333f);
            M.Multiply(0.99f);

            SparseMatrix IM = SparseMatrix.Identity(statenum) - M;
            Vector utility = IM.SolveLinearEquation2(R);

            for (int i = 0; i < 3; ++i)
            {
                Q[i] = Ma[i].MatrixMultiplyRight(utility);
            }
        }