Example #1
0
        public MDPPolicy <STATE_TYPE, ACTION_TYPE> policyIteration(double gamma)
        {
            MDPUtilityFunction <STATE_TYPE>     U  = initialUtilityFunction();
            MDPPolicy <STATE_TYPE, ACTION_TYPE> pi = randomPolicy();
            bool unchanged = false;

            do
            {
                unchanged = true;

                U = policyEvaluation(pi, U, gamma, 3);
                foreach (STATE_TYPE s in nonFinalstates)
                {
                    Pair <ACTION_TYPE, Double> maxTransit = transitionModel
                                                            .getTransitionWithMaximumExpectedUtility(s, U);
                    Pair <ACTION_TYPE, Double> maxPolicyTransit = transitionModel
                                                                  .getTransitionWithMaximumExpectedUtilityUsingPolicy(pi,
                                                                                                                      s, U);

                    if (maxTransit.getSecond() > maxPolicyTransit.getSecond())
                    {
                        pi.setAction(s, maxTransit.getFirst());
                        unchanged = false;
                    }
                }
            } while (unchanged == false);
            return(pi);
        }
Example #2
0
        public MDPPolicy <STATE_TYPE, ACTION_TYPE> randomPolicy()
        {
            MDPPolicy <STATE_TYPE, ACTION_TYPE> policy = new MDPPolicy <STATE_TYPE, ACTION_TYPE>();

            foreach (STATE_TYPE s in nonFinalstates)
            {
                policy.setAction(s, transitionModel.randomActionFor(s));
            }
            return(policy);
        }