public MDPUtilityFunction <TState> PolicyEvaluation( MDPPolicy <TState, TAction> pi, MDPUtilityFunction <TState> u, double gamma, int iterations) { var uDash = u.Copy(); for (var i = 0; i < iterations; i++) { uDash = this.ValueIterateOnceWith(gamma, pi, uDash); } return(uDash); }
private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma, MDPPolicy <TState, TAction> pi, MDPUtilityFunction <TState> U) { MDPUtilityFunction <TState> uDash = U.Copy(); foreach (var s in this.nonFinalstates) { var highestPolicyTransition = this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U); double utility = rewardFunction.GetRewardFor(s) + (gamma * highestPolicyTransition.GetSecond()); uDash.SetUtility(s, utility); } // TODO: debugging code // System.out.println("ValueIterationOnce before " + U); // System.out.println("ValueIterationOnce after " + U_dash); return(uDash); }