public void PolicyIteration(double dEpsilon, out int cUpdates, out TimeSpan tsExecutionTime) { Debug.WriteLine("Starting policy iteration"); DateTime dtBefore = DateTime.Now; cUpdates = 0; //your code here double maxDelta, delta; initV0(); RandomPolicy rp = new RandomPolicy(m_dDomain); foreach (State s in m_dDomain.States) { ViBySActions.Add(s, rp.GetAction(s)); } do { maxDelta = Double.MinValue; foreach (State s in m_dDomain.States) { delta = update(s); cUpdates++; maxDelta = Math.Max(delta, maxDelta); } // Console.WriteLine(maxDelta); } while (maxDelta >= dEpsilon); tsExecutionTime = DateTime.Now - dtBefore; Debug.WriteLine("\nFinished policy iteration"); }
public void PolicyIteration(double dEpsilon, out int cUpdates, out TimeSpan tsExecutionTime) { Debug.WriteLine("Starting policy iteration"); DateTime dtBefore = DateTime.Now; cUpdates = 0; //your code here double maxDelta, delta; initV0(); RandomPolicy rp = new RandomPolicy(m_dDomain); foreach (State s in m_dDomain.States) ViBySActions.Add(s, rp.GetAction(s)); do { maxDelta = Double.MinValue; foreach (State s in m_dDomain.States) { delta = update(s); cUpdates++; maxDelta = Math.Max(delta, maxDelta); } // Console.WriteLine(maxDelta); } while (maxDelta >= dEpsilon); tsExecutionTime = DateTime.Now - dtBefore; Debug.WriteLine("\nFinished policy iteration"); }
static void Main(string[] args) { FileStream fs = new FileStream("Debug.txt", FileMode.Create); Debug.Listeners.Add(new TextWriterTraceListener(Console.Out)); Debug.Listeners.Add(new TextWriterTraceListener(fs)); RaceTrack rc = new RaceTrack("RaceTrack1.bmp");//change here to a different race BMP: 2 is very small, 3 is very large RandomPolicy p = new RandomPolicy(rc); RaceViewer form = new RaceViewer(rc); form.Start(); //rc.DrawRace(p, form); ValueFunction vi1 = new ValueFunction(rc); ValueFunction vi2 = new ValueFunction(rc); ValueFunction vi3 = new ValueFunction(rc); int cUpdates1 = 0, cUpdates2 = 0, cUpdates3 = 0; TimeSpan ts1, ts2, ts3; form.StateValues = vi3; // vi3.RealTimeDynamicProgramming(100, out cUpdates3, out ts3); form.StateValues = vi1; vi1.ValueIteration(0.5, out cUpdates1, out ts1); form.StateValues = vi2; //vi2.PrioritizedValueIteration(0.5, out cUpdates2, out ts2); rc.DrawRace(vi3, form); double dADR1 = rc.ComputeAverageDiscountedReward(vi1, 1000, 100); double dADR2 = rc.ComputeAverageDiscountedReward(vi2, 1000, 100); double dADR3 = rc.ComputeAverageDiscountedReward(vi3, 1000, 100); Debug.Close(); }