public void Initialize(Workspace ws) { actionDimension = 1; State testState = CalcStateFromWorkspace(ws); fixArray = new bool[testState.States.Length + actionDimension]; for (int i = 0; i < fixArray.Length; ++i) { if (i < testState.States.Length) fixArray[i] = true; else fixArray[i] = false; } }
public int[] Step(Workspace ws) { Action action; State state = CalcStateFromWorkspace(ws); float reward = GetRewardFromWorkspace(ws); action = EpsilonGreedy(state, epsilon, ws); if ((prevState != null) && (prevAction != null)) { int[] maxp; float Qtp1max = float.MinValue; //GetSubspaceGlobalMax for (int i = 0; i < 4; ++i) { if (Qtp1max < ws.uti[i,state.States[1],state.States[0]]) { Qtp1max = ws.uti[i,state.States[1],state.States[0]]; maxp = new int[] {i}; } } float Qt = ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]]; float deltaQ = alpha * (reward + discount * Qtp1max - Qt); ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]] += deltaQ; } prevAction = action; prevState = state; //sarList.Add(new StateActionReward(state, reward, action)); return action.Actions; }
private float GetRewardFromWorkspace(Workspace ws) { if (ws.map[ws.pos.Y, ws.pos.X] == 3) { return 10; } else { return -0.1f; } }
private State CalcStateFromWorkspace(Workspace ws) { List<int> data = new List<int>(); data.Add(ws.pos.X); data.Add(ws.pos.Y); return new State(data.ToArray()); }
private Action GetBestAction(State state, Workspace ws) { int[] maxp = new int[] { 0 }; float Qtp1max = float.MinValue; //GetSubspaceGlobalMax for (int i = 0; i < 4; ++i) { if (Qtp1max < ws.uti[i, state.States[1], state.States[0]]) { Qtp1max = ws.uti[i, state.States[1], state.States[0]]; maxp = new int[] { i }; } } return ActionFromInput(maxp); }
private Action EpsilonGreedy(State state, float epsilon, Workspace ws) { if (r.NextDouble() < epsilon) { return GetBestAction(state,ws); } else { int[] data = new int[actionDimension]; for (int i = 0; i < actionDimension; ++i) { data[i] = r.Next(4); } return new Action(data); } }