示例#1
0
        public void PolicyIteration(double dEpsilon, out int cUpdates, out TimeSpan tsExecutionTime)
        {
            Debug.WriteLine("Starting policy iteration");
            DateTime dtBefore = DateTime.Now;

            cUpdates = 0;

            //your code here
            double maxDelta, delta;

            initV0();
            RandomPolicy rp = new RandomPolicy(m_dDomain);

            foreach (State s in m_dDomain.States)
            {
                ViBySActions.Add(s, rp.GetAction(s));
            }
            do
            {
                maxDelta = Double.MinValue;
                foreach (State s in m_dDomain.States)
                {
                    delta = update(s);
                    cUpdates++;
                    maxDelta = Math.Max(delta, maxDelta);
                }
                // Console.WriteLine(maxDelta);
            } while (maxDelta >= dEpsilon);

            tsExecutionTime = DateTime.Now - dtBefore;
            Debug.WriteLine("\nFinished policy iteration");
        }
        public void PolicyIteration(double dEpsilon, out int cUpdates, out TimeSpan tsExecutionTime)
        {
            Debug.WriteLine("Starting policy iteration");
            DateTime dtBefore = DateTime.Now;
            cUpdates = 0;

            //your code here
            double maxDelta, delta;
            initV0();
            RandomPolicy rp = new RandomPolicy(m_dDomain);
            foreach (State s in m_dDomain.States)
                ViBySActions.Add(s, rp.GetAction(s));
            do
            {
                maxDelta = Double.MinValue;
                foreach (State s in m_dDomain.States)
                {
                    delta = update(s);
                    cUpdates++;
                    maxDelta = Math.Max(delta, maxDelta);
                }
               // Console.WriteLine(maxDelta);
            } while (maxDelta >= dEpsilon);

            tsExecutionTime = DateTime.Now - dtBefore;
            Debug.WriteLine("\nFinished policy iteration");
        }
示例#3
0
        static void Main(string[] args)
        {
            FileStream fs = new FileStream("Debug.txt", FileMode.Create);

            Debug.Listeners.Add(new TextWriterTraceListener(Console.Out));
            Debug.Listeners.Add(new TextWriterTraceListener(fs));

            RaceTrack    rc   = new RaceTrack("RaceTrack1.bmp");//change here to a different race BMP: 2 is very small, 3 is very large
            RandomPolicy p    = new RandomPolicy(rc);
            RaceViewer   form = new RaceViewer(rc);

            form.Start();
            //rc.DrawRace(p, form);

            ValueFunction vi1 = new ValueFunction(rc);
            ValueFunction vi2 = new ValueFunction(rc);
            ValueFunction vi3 = new ValueFunction(rc);
            int           cUpdates1 = 0, cUpdates2 = 0, cUpdates3 = 0;
            TimeSpan      ts1, ts2, ts3;

            form.StateValues = vi3;
            //  vi3.RealTimeDynamicProgramming(100, out cUpdates3, out ts3);

            form.StateValues = vi1;
            vi1.ValueIteration(0.5, out cUpdates1, out ts1);

            form.StateValues = vi2;
            //vi2.PrioritizedValueIteration(0.5, out cUpdates2, out ts2);
            rc.DrawRace(vi3, form);

            double dADR1 = rc.ComputeAverageDiscountedReward(vi1, 1000, 100);
            double dADR2 = rc.ComputeAverageDiscountedReward(vi2, 1000, 100);
            double dADR3 = rc.ComputeAverageDiscountedReward(vi3, 1000, 100);

            Debug.Close();
        }