示例#1
0
        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            Debug.WriteLine("Started computing ADR");
            double dSumRewards = 0.0;
            //your code here
            double sumRewards = 0.0;
            for (int j = 0; j < cTrials; j++)
            {
                sumRewards += CompOneExperimant(p, cStepsPerTrial);
            }
            dSumRewards = (1.0 / cTrials) * sumRewards;

            Debug.WriteLine("\nDone computing ADR");
            return dSumRewards;
        }
示例#2
0
 private double CompOneExperimant(Policy p, int cStepsPerTrial)
 {
     State s = StartState;
     double r = 0;
     int i = 0;
     while (!IsGoalState(s) && i <= cStepsPerTrial)
     {
         Action a = p.GetAction(s);
         r += Math.Pow(DiscountFactor, i) * s.Reward(a);
         i++;
         foreach (State stag in States)
             if (s.TransitionProbability(a, stag) != 0)
                 s = stag;
     }
     return r;
 }
 public void DrawRace(Policy p, RaceViewer form)
 {
     form.StateValues = null;
     form.Start();
     //form.ShowDialog();
     while (form.Active)
     {
         Thread.Sleep(100);
         RaceCarState s = (RaceCarState)StartState;
         VelocityAction a = null;
         while (form.Active && !IsGoalState(s))
         {
             a = (VelocityAction)p.GetAction(s);
             if (a == null)
                 break;
             form.CarState = s;
             //form.Invoke(form.RefreshForm);
             //form.SetCarState(s);
             Thread.Sleep(100);
             RaceCarState sTag = (RaceCarState)s.Apply(a);
             s = sTag;
         }
     }
 }