コード例 #1
0
        public MDPPolicyIteration()
        {
            S = new State<Double>(3, 3, 0);
            Double[] val = { 1, 0, -1, 2, -1, -2, 3, -2, -3 };

            S.setContent(val);

            mdp = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S),
                   new TransitionProbabilityFunction(S),
                   new RewardFunction());

            double epsilon = 0.00001;

            PolicyEvaluation pev = new PolicyEvaluation(1000, epsilon);

            pi = new PolicyIteration(pev);

            policy = pi.policyIteration(mdp);
        }
コード例 #2
0
 public void makePolicy()
 {
     policy = pi.policyIteration(mdp);
 }