public static void GetStateAndAction(int i, int j, int k, out InvertedPendulumState state, out InvertedPendulumAction action) { GetState(j, k, out state); GetAction(i, out action); }
public static void GetActionIndices(InvertedPendulumAction action, out int i) { i = action.action + 1; }
public float[, ,] value = new float[3, LEN, LEN];//-1,0,1;-pi..pi;-10..10 public static void GetIndices(InvertedPendulumState state, InvertedPendulumAction action, out int i, out int j, out int k) { GetStateIndices(state, out j, out k); GetActionIndices(action, out i); }
public void GetBestActionAndUtilityForState(ReinforcementLearningState state, out ReinforcementLearningAction action, out float retval) { float max = float.MinValue; int maxact = 0; int i; int j; int k; GetStateIndices((InvertedPendulumState)state, out j, out k); for (int act = -1; act <= 1; ++act) { GetActionIndices(new InvertedPendulumAction(act), out i); if (value[i, j, k] > max) { max = value[i, j, k]; maxact = act; } } if (value[1, j, k] == max) maxact = 0; action = new InvertedPendulumAction(maxact); retval = max; }
public static void GetAction(int i, out InvertedPendulumAction action) { action = new InvertedPendulumAction(i - 1); }
private void button5_Click(object sender, EventArgs e) { adp.Train(); customControl21.Invalidate(); Random r = new Random(); InvertedPendulumQStore QStore = (InvertedPendulumQStore)adp.qstore; for (int aa = 0; aa < 100000; ++aa) { InvertedPendulumState state = new InvertedPendulumState((float)(2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 3); InvertedPendulumAction action = new InvertedPendulumAction(r.Next(3) - 1); ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.Evaluate(state, action) })); } //for (int j = 0; j < QStore.value.GetLength(1); ++j) //{ // for (int k = 0; k < QStore.value.GetLength(2); ++k) // { // for (int i = 0; i < QStore.value.GetLength(0); ++i) // { // InvertedPendulumState state; // InvertedPendulumAction action; // InvertedPendulumQStore.GetStateAndAction(i, j, k, out state, out action); // ((InvertedPendulumESIGMNQStore)esigmn.qFunction).igmn.Train(new Vector(new double[] { state.a, state.w, action.action, QStore.value[i, j, k] })); // } // } //} esigmnViewer1.Invalidate(); }