public int[] Step(Workspace ws) { Action action; State state = CalcStateFromWorkspace(ws); float reward = GetRewardFromWorkspace(ws); action = EpsilonGreedy(state, epsilon, ws); if ((prevState != null) && (prevAction != null)) { int[] maxp; float Qtp1max = float.MinValue; //GetSubspaceGlobalMax for (int i = 0; i < 4; ++i) { if (Qtp1max < ws.uti[i,state.States[1],state.States[0]]) { Qtp1max = ws.uti[i,state.States[1],state.States[0]]; maxp = new int[] {i}; } } float Qt = ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]]; float deltaQ = alpha * (reward + discount * Qtp1max - Qt); ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]] += deltaQ; } prevAction = action; prevState = state; //sarList.Add(new StateActionReward(state, reward, action)); return action.Actions; }
public StateActionReward(State state, float reward, Action action) { this.state = state; this.reward = reward; this.action = action; }
private float[] InputFromStateAction(State state, Action action) { float[] input = new float[state.States.Length + actionDimension]; for (int i = 0; i < fixArray.Length; ++i) { if (i < state.States.Length) input[i] = state.States[i]; else { if (action != null) { input[i] = action.Actions[i - state.States.Length]; } } } return input; }
private Action GetBestAction(State state, Workspace ws) { int[] maxp = new int[] { 0 }; float Qtp1max = float.MinValue; //GetSubspaceGlobalMax for (int i = 0; i < 4; ++i) { if (Qtp1max < ws.uti[i, state.States[1], state.States[0]]) { Qtp1max = ws.uti[i, state.States[1], state.States[0]]; maxp = new int[] { i }; } } return ActionFromInput(maxp); }
private Action EpsilonGreedy(State state, float epsilon, Workspace ws) { if (r.NextDouble() < epsilon) { return GetBestAction(state,ws); } else { int[] data = new int[actionDimension]; for (int i = 0; i < actionDimension; ++i) { data[i] = r.Next(4); } return new Action(data); } }
public void EpisodeBegin() { prevAction = null; prevState = null; //sarList = new List<StateActionReward>(); }