private int GetClosestStateIndex(List<InvertedPendulumState> states, InvertedPendulumState curstate) { double mindist = float.MaxValue; int closeststate = 0; for (int i = 0; i < states.Count; ++i) { InvertedPendulumState state = states[i]; double dist = (state.a - curstate.a) * (state.a - curstate.a) + (state.w - curstate.w) * (state.w - curstate.w); if (mindist > dist) { mindist = dist; closeststate = i; } } return closeststate; }
public static void GetStateAndAction(int i, int j, int k, out InvertedPendulumState state, out InvertedPendulumAction action) { GetState(j, k, out state); GetAction(i, out action); }
public static void GetStateIndices(InvertedPendulumState state, out int j, out int k) { j = ((int)((state.a / Math.PI + 1) * LEN / 2) + LEN * 1000) % LEN; k = (int)((state.w / 3 + 1) * LEN / 2); if (k > LEN - 1) k = LEN - 1; if (k < 0) k = 0; }
public float[, ,] value = new float[3, LEN, LEN];//-1,0,1;-pi..pi;-10..10 public static void GetIndices(InvertedPendulumState state, InvertedPendulumAction action, out int i, out int j, out int k) { GetStateIndices(state, out j, out k); GetActionIndices(action, out i); }
public static void GetState(int j, int k, out InvertedPendulumState state) { double a = (j / (float)(LEN / 2) - 1) * Math.PI; double w = (k / (float)(LEN / 2) - 1) * 3; state = new InvertedPendulumState(a, w); }
public void ReplaceValue(InvertedPendulumState state, InvertedPendulumAction2 action, float value) { //TODO! delta??* igmn.Train(new Vector(new double[] { state.a, state.w, action.action, value })); }
internal void SetState(InvertedPendulumState state) { this.a = state.a; this.w = state.w; }
public ReinforcementLearningState State() { InvertedPendulumState state = new InvertedPendulumState(a, w); return state; }
private void button5_Click(object sender, EventArgs e) { adp.Train(); customControl21.Invalidate(); Random r = new Random(); InvertedPendulumQStore QStore = (InvertedPendulumQStore)adp.qstore; for (int aa = 0; aa < 100000; ++aa) { InvertedPendulumState state = new InvertedPendulumState((float)(2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 3); InvertedPendulumAction action = new InvertedPendulumAction(r.Next(3) - 1); ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.Evaluate(state, action) })); } //for (int j = 0; j < QStore.value.GetLength(1); ++j) //{ // for (int k = 0; k < QStore.value.GetLength(2); ++k) // { // for (int i = 0; i < QStore.value.GetLength(0); ++i) // { // InvertedPendulumState state; // InvertedPendulumAction action; // InvertedPendulumQStore.GetStateAndAction(i, j, k, out state, out action); // ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.value[i, j, k] })); // } // } //} esigmnViewer1.Invalidate(); }
public void GenerateMMatrices() { InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment(); int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN; Ma = new SparseMatrix[3]; for (int i = 0; i < 3; ++i) { Ma[i] = new SparseMatrix(statenum); } for (int ii = 0; ii < statenum; ++ii) { int j = ii % InvertedPendulumQStore.LEN; int k = ii / InvertedPendulumQStore.LEN; InvertedPendulumState state; InvertedPendulumQStore.GetState(j, k, out state); for (int i = 0; i < 3; ++i) { int action = i - 1; for (int i1 = 0; i1 < 1000; ++i1) { testws.a = state.a + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * Math.PI; testws.w = state.w + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * 3; testws.Step(action); int j2, k2; InvertedPendulumState state2 = new InvertedPendulumState(testws.a, testws.w); InvertedPendulumQStore.GetStateIndices(state2, out j2, out k2); int ii2 = k2 * InvertedPendulumQStore.LEN + j2; Ma[i][ii, ii2] += 0.001f; } } } SaveMMatrices("mmatrices.dat"); }