public MDPPolicyIteration() { S = new State<Double>(3, 3, 0); Double[] val = { 1, 0, -1, 2, -1, -2, 3, -2, -3 }; S.setContent(val); mdp = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S), new TransitionProbabilityFunction(S), new RewardFunction()); double epsilon = 0.00001; PolicyEvaluation pev = new PolicyEvaluation(1000, epsilon); pi = new PolicyIteration(pev); policy = pi.policyIteration(mdp); }
public PolicyIteration(PolicyEvaluation policyEvaluation) { this.policyEvaluation = policyEvaluation; }