public Pair <MDPUtilityFunction <STATE_TYPE>, Double> valueIterateOnce( double gamma, MDPUtilityFunction <STATE_TYPE> presentUtilityFunction) { double maxUtilityGrowth = 0.0; MDPUtilityFunction <STATE_TYPE> newUtilityFunction = new MDPUtilityFunction <STATE_TYPE>(); foreach (STATE_TYPE s in nonFinalstates) { // double utility = rewardFunction.getRewardFor(s) // + (gamma * highestUtilityTransition.getSecond()); double utility = valueIterateOnceForGivenState(gamma, presentUtilityFunction, s); double differenceInUtility = Math.Abs(utility - presentUtilityFunction.getUtility(s)); if (differenceInUtility > maxUtilityGrowth) { maxUtilityGrowth = differenceInUtility; } newUtilityFunction.setUtility(s, utility); foreach (STATE_TYPE state in terminalStates) { newUtilityFunction.setUtility(state, presentUtilityFunction .getUtility(state)); } } return(new Pair <MDPUtilityFunction <STATE_TYPE>, Double>( newUtilityFunction, maxUtilityGrowth)); }
public MDPUtilityFunction <STATE_TYPE> valueIteration(double gamma, double error, double delta) { MDPUtilityFunction <STATE_TYPE> U = initialUtilityFunction(); MDPUtilityFunction <STATE_TYPE> U_dash = initialUtilityFunction(); double delta_max = (error * gamma) / (1 - gamma); do { U = U_dash.copy(); // System.Console.WriteLine(U); delta = 0.0; foreach (STATE_TYPE s in nonFinalstates) { Pair <ACTION_TYPE, Double> highestUtilityTransition = transitionModel .getTransitionWithMaximumExpectedUtility(s, U); double utility = rewardFunction.getRewardFor(s) + (gamma * highestUtilityTransition.getSecond()); U_dash.setUtility(s, utility); if ((Math.Abs(U_dash.getUtility(s) - U.getUtility(s))) > delta) { delta = Math.Abs(U_dash.getUtility(s) - U.getUtility(s)); } } } while (delta < delta_max); return(U); }
private Dictionary <ACTION_TYPE, Double> getExpectedUtilityForSelectedTransitions( List <MDPTransition <STATE_TYPE, ACTION_TYPE> > transitions, MDPUtilityFunction <STATE_TYPE> uf) { Dictionary <ACTION_TYPE, Double> actionsToUtilities = new Dictionary <ACTION_TYPE, Double>(); foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> triplet in transitions) { STATE_TYPE s = triplet.getInitialState(); ACTION_TYPE action = triplet.getAction(); STATE_TYPE destinationState = triplet.getDestinationState(); double probabilityOfTransition = getTransitionProbability(s, action, destinationState); double expectedUtility = (probabilityOfTransition * uf .getUtility(destinationState)); Double presentValue = actionsToUtilities.ContainsKey(action)? actionsToUtilities[action]: Double.MinValue; if (presentValue == Double.MinValue) { actionsToUtilities.Add(action, expectedUtility); } else { actionsToUtilities[action] += expectedUtility; } } return(actionsToUtilities); }