public Pair <MDPUtilityFunction <STATE_TYPE>, Double> valueIterateOnce( double gamma, MDPUtilityFunction <STATE_TYPE> presentUtilityFunction) { double maxUtilityGrowth = 0.0; MDPUtilityFunction <STATE_TYPE> newUtilityFunction = new MDPUtilityFunction <STATE_TYPE>(); foreach (STATE_TYPE s in nonFinalstates) { // double utility = rewardFunction.getRewardFor(s) // + (gamma * highestUtilityTransition.getSecond()); double utility = valueIterateOnceForGivenState(gamma, presentUtilityFunction, s); double differenceInUtility = Math.Abs(utility - presentUtilityFunction.getUtility(s)); if (differenceInUtility > maxUtilityGrowth) { maxUtilityGrowth = differenceInUtility; } newUtilityFunction.setUtility(s, utility); foreach (STATE_TYPE state in terminalStates) { newUtilityFunction.setUtility(state, presentUtilityFunction .getUtility(state)); } } return(new Pair <MDPUtilityFunction <STATE_TYPE>, Double>( newUtilityFunction, maxUtilityGrowth)); }
public MDPUtilityFunction <STATE_TYPE> valueIteration(double gamma, double error, double delta) { MDPUtilityFunction <STATE_TYPE> U = initialUtilityFunction(); MDPUtilityFunction <STATE_TYPE> U_dash = initialUtilityFunction(); double delta_max = (error * gamma) / (1 - gamma); do { U = U_dash.copy(); // System.Console.WriteLine(U); delta = 0.0; foreach (STATE_TYPE s in nonFinalstates) { Pair <ACTION_TYPE, Double> highestUtilityTransition = transitionModel .getTransitionWithMaximumExpectedUtility(s, U); double utility = rewardFunction.getRewardFor(s) + (gamma * highestUtilityTransition.getSecond()); U_dash.setUtility(s, utility); if ((Math.Abs(U_dash.getUtility(s) - U.getUtility(s))) > delta) { delta = Math.Abs(U_dash.getUtility(s) - U.getUtility(s)); } } } while (delta < delta_max); return(U); }
public MDPUtilityFunction <STATE_TYPE> asUtilityFunction() { MDPUtilityFunction <STATE_TYPE> uf = new MDPUtilityFunction <STATE_TYPE>(); foreach (STATE_TYPE state in stateToReward.Keys) { uf.setUtility(state, getRewardFor(state)); } return(uf); }
public MDPUtilityFunction <STATE_TYPE> copy() { MDPUtilityFunction <STATE_TYPE> other = new MDPUtilityFunction <STATE_TYPE>(); foreach (STATE_TYPE state in hash.Keys) { other.setUtility(state, hash[state]); } return(other); }
private MDPUtilityFunction <STATE_TYPE> valueIterateOnceWith(double gamma, MDPPolicy <STATE_TYPE, ACTION_TYPE> pi, MDPUtilityFunction <STATE_TYPE> U) { MDPUtilityFunction <STATE_TYPE> U_dash = U.copy(); foreach (STATE_TYPE s in nonFinalstates) { Pair <ACTION_TYPE, Double> highestPolicyTransition = transitionModel .getTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U); double utility = rewardFunction.getRewardFor(s) + (gamma * highestPolicyTransition.getSecond()); U_dash.setUtility(s, utility); } // System.Console.WriteLine("ValueIterationOnce before " + U); // System.Console.WriteLine("ValueIterationOnce after " + U_dash); return(U_dash); }