Exemplo n.º 1
0
 public double PolicityProb(AgentAction a, Position p)
 {
     if (policy[p.y, p.x].Contains(a))
     {
         return (double)1 / policy[p.y, p.x].Length;
     }
     return 0;
 }
Exemplo n.º 2
0
 public Position NextState(Position currentPosition, AgentAction a)
 {
     Position p = new Position();
     p.x = currentPosition.x + a.x;
     p.y = currentPosition.y + a.y;
     if (p.x >= 0 && p.x < length && p.y >= 0 && p.y < height) {
         return p;
     }
     else {
         return currentPosition;
     }
 }
Exemplo n.º 3
0
 public static AgentAction moveUp()
 {
     AgentAction action = new AgentAction();
     action.y = 1;
     return action;
 }
Exemplo n.º 4
0
 public static AgentAction moveRight()
 {
     AgentAction action = new AgentAction();
     action.x = 1;
     return action;
 }
Exemplo n.º 5
0
 public static AgentAction moveLeft()
 {
     AgentAction action = new AgentAction();
     action.x = -1;
     return action;
 }
Exemplo n.º 6
0
 public static AgentAction moveJump()
 {
     AgentAction action = new AgentAction();
     action.x = 4;
     return action;
 }
Exemplo n.º 7
0
 public static AgentAction moveDownJump()
 {
     AgentAction action = new AgentAction();
     action.y = 3;
     return action;
 }
Exemplo n.º 8
0
 public void PolicyImprovement()
 {
     Position p = new Position();
     Position next;
     AgentAction bestAction;
     double prevActionValue = Double.MinValue;
     double currentValue;
     for (int j = 0; j < grid.height; j++)
     {
         for (int k = 0; k < grid.length; k++)
         {
             p.Set(k, j);
             bestAction = null;
             prevActionValue = Double.MinValue;
             foreach (AgentAction a in policy[j, k])
             {
                 if (a == null)
                     continue;
                 if (grid.Reward(p) == 0)
                 {
                     next = p;
                 }
                 else
                 {
                     next = grid.NextState(p, a);
                 }
                 if (PolicityProb(a,p) != 0)
                 {
                     currentValue = (PolicityProb(a,p) * (grid.Reward(next) + discount * grid.world[next.y, next.x]));
                     if (currentValue >= prevActionValue)
                     {
                         bestAction = a;
                         prevActionValue = currentValue;
                     }
                 }
             }
             if (bestAction != null)
             {
                 policy[j, k] = new AgentAction[1];
                 policy[j, k][0] = bestAction;
             }
         }
     }
 }
Exemplo n.º 9
0
 public double PolicityProb(AgentAction a)
 {
     return PolicityProb(a, currentPosition);
 }
Exemplo n.º 10
0
 public double Posibility(Position next, AgentAction a)
 {
     if (next == grid.NextState(currentPosition, a))
     {
         return 1;
     }
     return 0;
 }