Пример #1
0
        public MDPUtilityFunction <TState> PolicyEvaluation(
            MDPPolicy <TState, TAction> pi,
            MDPUtilityFunction <TState> u, double gamma, int iterations)
        {
            var uDash = u.Copy();

            for (var i = 0; i < iterations; i++)
            {
                uDash = this.ValueIterateOnceWith(gamma, pi, uDash);
            }
            return(uDash);
        }
Пример #2
0
        private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma,
                                                                 MDPPolicy <TState, TAction> pi,
                                                                 MDPUtilityFunction <TState> U)
        {
            MDPUtilityFunction <TState> uDash = U.Copy();

            foreach (var s in this.nonFinalstates)
            {
                var highestPolicyTransition =
                    this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U);
                double utility = rewardFunction.GetRewardFor(s)
                                 + (gamma * highestPolicyTransition.GetSecond());
                uDash.SetUtility(s, utility);
            }
            // TODO: debugging code
            // System.out.println("ValueIterationOnce before " + U);
            // System.out.println("ValueIterationOnce after " + U_dash);
            return(uDash);
        }