public double PolicityProb(AgentAction a, Position p) { if (policy[p.y, p.x].Contains(a)) { return (double)1 / policy[p.y, p.x].Length; } return 0; }
public Position NextState(Position currentPosition, AgentAction a) { Position p = new Position(); p.x = currentPosition.x + a.x; p.y = currentPosition.y + a.y; if (p.x >= 0 && p.x < length && p.y >= 0 && p.y < height) { return p; } else { return currentPosition; } }
public static AgentAction moveUp() { AgentAction action = new AgentAction(); action.y = 1; return action; }
public static AgentAction moveRight() { AgentAction action = new AgentAction(); action.x = 1; return action; }
public static AgentAction moveLeft() { AgentAction action = new AgentAction(); action.x = -1; return action; }
public static AgentAction moveJump() { AgentAction action = new AgentAction(); action.x = 4; return action; }
public static AgentAction moveDownJump() { AgentAction action = new AgentAction(); action.y = 3; return action; }
public void PolicyImprovement() { Position p = new Position(); Position next; AgentAction bestAction; double prevActionValue = Double.MinValue; double currentValue; for (int j = 0; j < grid.height; j++) { for (int k = 0; k < grid.length; k++) { p.Set(k, j); bestAction = null; prevActionValue = Double.MinValue; foreach (AgentAction a in policy[j, k]) { if (a == null) continue; if (grid.Reward(p) == 0) { next = p; } else { next = grid.NextState(p, a); } if (PolicityProb(a,p) != 0) { currentValue = (PolicityProb(a,p) * (grid.Reward(next) + discount * grid.world[next.y, next.x])); if (currentValue >= prevActionValue) { bestAction = a; prevActionValue = currentValue; } } } if (bestAction != null) { policy[j, k] = new AgentAction[1]; policy[j, k][0] = bestAction; } } } }
public double PolicityProb(AgentAction a) { return PolicityProb(a, currentPosition); }
public double Posibility(Position next, AgentAction a) { if (next == grid.NextState(currentPosition, a)) { return 1; } return 0; }