public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial) { Debug.WriteLine("Started computing ADR"); double dSumRewards = 0.0; //your code here double sumRewards = 0.0; for (int j = 0; j < cTrials; j++) { sumRewards += CompOneExperimant(p, cStepsPerTrial); } dSumRewards = (1.0 / cTrials) * sumRewards; Debug.WriteLine("\nDone computing ADR"); return dSumRewards; }
private double CompOneExperimant(Policy p, int cStepsPerTrial) { State s = StartState; double r = 0; int i = 0; while (!IsGoalState(s) && i <= cStepsPerTrial) { Action a = p.GetAction(s); r += Math.Pow(DiscountFactor, i) * s.Reward(a); i++; foreach (State stag in States) if (s.TransitionProbability(a, stag) != 0) s = stag; } return r; }
public void DrawRace(Policy p, RaceViewer form) { form.StateValues = null; form.Start(); //form.ShowDialog(); while (form.Active) { Thread.Sleep(100); RaceCarState s = (RaceCarState)StartState; VelocityAction a = null; while (form.Active && !IsGoalState(s)) { a = (VelocityAction)p.GetAction(s); if (a == null) break; form.CarState = s; //form.Invoke(form.RefreshForm); //form.SetCarState(s); Thread.Sleep(100); RaceCarState sTag = (RaceCarState)s.Apply(a); s = sTag; } } }