Пример #1
0
        public void PolicyEvaluation(int steps)
        {
            Position p = new Position();
            Position next;
            double total;

            for (int i = 0; i < steps; i++) {
                for (int j = 0; j < grid.height; j++)
                {
                    for (int k = 0; k < grid.length; k++)
                    {
                        p.Set(k, j);
                        total = 0;
                        foreach(AgentAction a in actions){
                            if (grid.Reward(p) == 0)
                            {
                                next = p;
                            }
                            else
                            {
                                next = grid.NextState(p, a);
                            }
                            total += (PolicityProb(a,p) * (grid.Reward(next) + discount * grid.world[next.y, next.x]));
                        }
                        grid.world[j, k] = total;
                    }
                }
            }
        }
Пример #2
0
 public void PolicyImprovement()
 {
     Position p = new Position();
     Position next;
     AgentAction bestAction;
     double prevActionValue = Double.MinValue;
     double currentValue;
     for (int j = 0; j < grid.height; j++)
     {
         for (int k = 0; k < grid.length; k++)
         {
             p.Set(k, j);
             bestAction = null;
             prevActionValue = Double.MinValue;
             foreach (AgentAction a in policy[j, k])
             {
                 if (a == null)
                     continue;
                 if (grid.Reward(p) == 0)
                 {
                     next = p;
                 }
                 else
                 {
                     next = grid.NextState(p, a);
                 }
                 if (PolicityProb(a,p) != 0)
                 {
                     currentValue = (PolicityProb(a,p) * (grid.Reward(next) + discount * grid.world[next.y, next.x]));
                     if (currentValue >= prevActionValue)
                     {
                         bestAction = a;
                         prevActionValue = currentValue;
                     }
                 }
             }
             if (bestAction != null)
             {
                 policy[j, k] = new AgentAction[1];
                 policy[j, k][0] = bestAction;
             }
         }
     }
 }
Пример #3
0
 public void ValueIteration()
 {
     Position p = new Position();
     Position next;
     double total;
     double tmp = 0;
     //delta indicates how much the value function has changed
     double delta;
     //omega denotes the minimum change for the vlaue function
     //to terminate the calculation
     double omega = 0.001;
     do
     {
         delta = 0;
         for (int j = 0; j < grid.height; j++)
         {
             for (int k = 0; k < grid.length; k++)
             {
                 p.Set(k, j);
                 total = double.MinValue;
                 foreach (AgentAction a in actions)
                 {
                     if (grid.Reward(p) == 0)
                     {
                         next = p;
                     }
                     else
                     {
                         next = grid.NextState(p, a);
                     }
                     tmp = (PolicityProb(a, p) * (grid.Reward(next) + discount * grid.world[next.y, next.x]));
                     if (tmp > total)
                     {
                         total = tmp;
                     }
                 }
                 //we check if the change is bigger
                 delta = Math.Max(delta, Math.Abs(total - grid.world[j, k]));
                 grid.world[j, k] = total;
             }
         }
     } while (delta > omega);
 }