public Form3() { InitializeComponent(); ws = customControl11.Workspace = new InvertedPendulumEnvironment(); this.KeyPreview = true; timer1.Enabled = true; }
public Form1() { InitializeComponent(); ws = customControl11.Workspace = new InvertedPendulumEnvironment(); oppWs = customControl12.Workspace = new InvertedPendulumEnvironment(); this.KeyPreview = true; }
public void GenerateMMatrices() { InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment(); int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN; Ma = new SparseMatrix[3]; for (int i = 0; i < 3; ++i) { Ma[i] = new SparseMatrix(statenum); } for (int ii = 0; ii < statenum; ++ii) { int j = ii % InvertedPendulumQStore.LEN; int k = ii / InvertedPendulumQStore.LEN; InvertedPendulumState state; InvertedPendulumQStore.GetState(j, k, out state); for (int i = 0; i < 3; ++i) { int action = i - 1; for (int i1 = 0; i1 < 1000; ++i1) { testws.a = state.a + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * Math.PI; testws.w = state.w + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * 3; testws.Step(action); int j2, k2; InvertedPendulumState state2 = new InvertedPendulumState(testws.a, testws.w); InvertedPendulumQStore.GetStateIndices(state2, out j2, out k2); int ii2 = k2 * InvertedPendulumQStore.LEN + j2; Ma[i][ii, ii2] += 0.001f; } } } SaveMMatrices("mmatrices.dat"); }
private void GenerateRVector() { int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN; InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment(); R = new Vector(statenum); for (int ii = 0; ii < statenum; ++ii) { int j = ii % InvertedPendulumQStore.LEN; int k = ii / InvertedPendulumQStore.LEN; InvertedPendulumState state; InvertedPendulumQStore.GetState(j, k, out state); testws.a = state.a; testws.w = state.w; R.Elements[ii] = testws.Reward(); } }
public void Train() { InvertedPendulumEnvironment testenv = new InvertedPendulumEnvironment(); int mincount = 100; states = new List<InvertedPendulumState>(); for(int i=0; i<mincount; ++i) { testenv.Reset(); InvertedPendulumState newstate = (InvertedPendulumState)testenv.State(); states.Add(newstate); } int statenum = states.Count; //allapot-atmenet valsegek es varhato rewardok szamitasa Vector R = new Vector(statenum); SparseMatrix[] Ma = new SparseMatrix[3]; for (int i = 0; i < 3; ++i) { Ma[i] = new SparseMatrix(statenum); } Vector counts = new Vector(statenum); for (int ii = 0; ii < statenum*10000; ++ii) { testenv.Reset(); InvertedPendulumState curstate = (InvertedPendulumState)testenv.State(); int from = GetClosestStateIndex(states, curstate); R.Elements[from] += testenv.Reward(); for (int i = 0; i < 3; ++i) { int action = i - 1; testenv.SetState(curstate); testenv.Step(action); InvertedPendulumState tostate = (InvertedPendulumState)testenv.State(); int to = GetClosestStateIndex(states, tostate); Ma[i][from, to] += 1; } float temp = (float)counts.Elements[from]; if (temp != 0) temp = 1 / (1 / temp + 1); else temp = 1; counts.Elements[from] = temp; } for (int i = 0; i < 3; ++i) { Ma[i].Multiply(counts); } R.Multiply(counts); SparseMatrix M = (Ma[0] + Ma[1] + Ma[2]); M.Multiply(0.333333f); M.Multiply(0.99f); SparseMatrix IM = SparseMatrix.Identity(statenum) - M; Vector utility = IM.SolveLinearEquation2(R); for (int i = 0; i < 3; ++i) { Q[i] = Ma[i].MatrixMultiplyRight(utility); } }