Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            string     path = System.IO.Directory.GetCurrentDirectory();
            FileStream fs   = new FileStream(path + "Debug.txt", FileMode.Create);

            Debug.Listeners.Add(new TextWriterTraceListener(Console.Out));
            Debug.Listeners.Add(new TextWriterTraceListener(fs));

            MazeDomain maze = new MazeDomain(path + "/Maze3.txt");

            PointBasedValueIteration pbvi = new PointBasedValueIteration(maze);

            pbvi.PointBasedVI(100, 20);

            //MDPValueFunction v = new MDPValueFunction(maze);
            //v.ValueIteration(0.5);

            //RandomPolicy p0 = new RandomPolicy(maze);
            //MostLikelyStatePolicy p1 = new MostLikelyStatePolicy(v);
            //VotingPolicy p2 = new VotingPolicy(v);
            //QMDPPolicy p3 = new QMDPPolicy(v, maze);

            //double dADR1 = maze.ComputeAverageDiscountedReward(p1, 100, 100);
            //double dADR2 = maze.ComputeAverageDiscountedReward(p2, 100, 100);
            //double dADR3 = maze.ComputeAverageDiscountedReward(p3, 100, 100);
            double dADR4 = maze.ComputeAverageDiscountedReward(pbvi, 100, 100);

            MazeViewer viewer = new MazeViewer(maze);

            viewer.Start();
            maze.SimulatePolicy(pbvi, 10, viewer);

            Debug.Close();
        }
Ejemplo n.º 2
0
        public void SimulatePolicy(Policy p, int cTrials, MazeViewer viewer)
        {
            int iTrial = 0;

            for (iTrial = 0; iTrial < cTrials; iTrial++)
            {
                SimulateTrial(p, viewer);
            }
        }
Ejemplo n.º 3
0
        private void SimulateTrial(Policy p, MazeViewer viewer)
        {
            BeliefState bsCurrent = InitialBelief, bsNext = null;
            State       sCurrent = bsCurrent.sampleState(), sNext = null;
            Action      a = null;
            Observation o = null;

            viewer.CurrentState  = (MazeState)sCurrent;
            viewer.CurrentBelief = bsCurrent;
            while (!IsGoalState(sCurrent))
            {
                a                         = p.GetAction(bsCurrent);
                sNext                     = sCurrent.Apply(a);
                o                         = sNext.RandomObservation(a);
                bsNext                    = bsCurrent.Next(a, o);
                bsCurrent                 = bsNext;
                sCurrent                  = sNext;
                viewer.CurrentState       = (MazeState)sCurrent;
                viewer.CurrentBelief      = bsCurrent;
                viewer.CurrentObservation = (MazeObservation)o;
                Thread.Sleep(500);
            }
        }