public Pair <MDPUtilityFunction <TState>, Double> ValueIterateOnce( double gamma, MDPUtilityFunction <TState> presentUtilityFunction) { var maxUtilityGrowth = 0.0; var newUtilityFunction = new MDPUtilityFunction <TState>(); foreach (TState s in nonFinalstates) { // TODO: figure out what to do with commented out code // double utility = rewardFunction.getRewardFor(s) // + (gamma * highestUtilityTransition.getSecond()); var utility = this.ValueIterateOnceForGivenState(gamma, presentUtilityFunction, s); var differenceInUtility = Math.Abs(utility - presentUtilityFunction.GetUtility(s)); if (differenceInUtility > maxUtilityGrowth) { maxUtilityGrowth = differenceInUtility; } newUtilityFunction.SetUtility(s, utility); foreach (var state in terminalStates) { newUtilityFunction.SetUtility(state, presentUtilityFunction .GetUtility(state)); } } return(new Pair <MDPUtilityFunction <TState>, Double>( newUtilityFunction, maxUtilityGrowth)); }
public MDPUtilityFunction <TState> Copy() { var other = new MDPUtilityFunction <TState>(); foreach (TState state in hash.Keys) { other.SetUtility(state, hash[state]); } return(other); }
public MDPUtilityFunction <TState> AsUtilityFunction() { var uf = new MDPUtilityFunction <TState>(); foreach (TState state in stateToReward.Keys) { uf.SetUtility(state, this.GetRewardFor(state)); } return(uf); }
private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma, MDPPolicy <TState, TAction> pi, MDPUtilityFunction <TState> U) { MDPUtilityFunction <TState> uDash = U.Copy(); foreach (var s in this.nonFinalstates) { var highestPolicyTransition = this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U); double utility = rewardFunction.GetRewardFor(s) + (gamma * highestPolicyTransition.GetSecond()); uDash.SetUtility(s, utility); } // TODO: debugging code // System.out.println("ValueIterationOnce before " + U); // System.out.println("ValueIterationOnce after " + U_dash); return(uDash); }