Example #1
0
        public POMDP(MDP mdp)
        {
            this.mdp = mdp;
            belief = new State<Double>(3, 3, 0);
            sensor = new Sensor();

            belief.setContent(new double[] { 0.311, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0 });
        }
        public MDPPolicyIteration()
        {
            S = new State<Double>(3, 3, 0);
            Double[] val = { 1, 0, -1, 2, -1, -2, 3, -2, -3 };

            S.setContent(val);

            mdp = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S),
                   new TransitionProbabilityFunction(S),
                   new RewardFunction());

            double epsilon = 0.00001;

            PolicyEvaluation pev = new PolicyEvaluation(1000, epsilon);

            pi = new PolicyIteration(pev);

            policy = pi.policyIteration(mdp);
        }