static void Main(string[] args) { string path = System.IO.Directory.GetCurrentDirectory(); FileStream fs = new FileStream(path + "Debug.txt", FileMode.Create); Debug.Listeners.Add(new TextWriterTraceListener(Console.Out)); Debug.Listeners.Add(new TextWriterTraceListener(fs)); MazeDomain maze = new MazeDomain(path + "/Maze3.txt"); PointBasedValueIteration pbvi = new PointBasedValueIteration(maze); pbvi.PointBasedVI(100, 20); //MDPValueFunction v = new MDPValueFunction(maze); //v.ValueIteration(0.5); //RandomPolicy p0 = new RandomPolicy(maze); //MostLikelyStatePolicy p1 = new MostLikelyStatePolicy(v); //VotingPolicy p2 = new VotingPolicy(v); //QMDPPolicy p3 = new QMDPPolicy(v, maze); //double dADR1 = maze.ComputeAverageDiscountedReward(p1, 100, 100); //double dADR2 = maze.ComputeAverageDiscountedReward(p2, 100, 100); //double dADR3 = maze.ComputeAverageDiscountedReward(p3, 100, 100); double dADR4 = maze.ComputeAverageDiscountedReward(pbvi, 100, 100); MazeViewer viewer = new MazeViewer(maze); viewer.Start(); maze.SimulatePolicy(pbvi, 10, viewer); Debug.Close(); }
public void SimulatePolicy(Policy p, int cTrials, MazeViewer viewer) { int iTrial = 0; for (iTrial = 0; iTrial < cTrials; iTrial++) { SimulateTrial(p, viewer); } }
private void SimulateTrial(Policy p, MazeViewer viewer) { BeliefState bsCurrent = InitialBelief, bsNext = null; State sCurrent = bsCurrent.sampleState(), sNext = null; Action a = null; Observation o = null; viewer.CurrentState = (MazeState)sCurrent; viewer.CurrentBelief = bsCurrent; while (!IsGoalState(sCurrent)) { a = p.GetAction(bsCurrent); sNext = sCurrent.Apply(a); o = sNext.RandomObservation(a); bsNext = bsCurrent.Next(a, o); bsCurrent = bsNext; sCurrent = sNext; viewer.CurrentState = (MazeState)sCurrent; viewer.CurrentBelief = bsCurrent; viewer.CurrentObservation = (MazeObservation)o; Thread.Sleep(500); } }