public Pair <MDPUtilityFunction <TState>, Double> ValueIterateOnce( double gamma, MDPUtilityFunction <TState> presentUtilityFunction) { var maxUtilityGrowth = 0.0; var newUtilityFunction = new MDPUtilityFunction <TState>(); foreach (TState s in nonFinalstates) { // TODO: figure out what to do with commented out code // double utility = rewardFunction.getRewardFor(s) // + (gamma * highestUtilityTransition.getSecond()); var utility = this.ValueIterateOnceForGivenState(gamma, presentUtilityFunction, s); var differenceInUtility = Math.Abs(utility - presentUtilityFunction.GetUtility(s)); if (differenceInUtility > maxUtilityGrowth) { maxUtilityGrowth = differenceInUtility; } newUtilityFunction.SetUtility(s, utility); foreach (var state in terminalStates) { newUtilityFunction.SetUtility(state, presentUtilityFunction .GetUtility(state)); } } return(new Pair <MDPUtilityFunction <TState>, Double>( newUtilityFunction, maxUtilityGrowth)); }
private Dictionary <TAction, Double> GetExpectedUtilityForSelectedTransitions( IList <MDPTransition <TState, TAction> > transitions, MDPUtilityFunction <TState> uf) { var actionsToUtilities = new Dictionary <TAction, double>(); foreach (var triplet in transitions) { var s = triplet.GetInitialState(); var action = triplet.GetAction(); var destinationState = triplet.GetDestinationState(); var probabilityOfTransition = this.GetTransitionProbability(s, action, destinationState); var expectedUtility = (probabilityOfTransition * uf .GetUtility(destinationState)); if (!actionsToUtilities.ContainsKey(action)) { actionsToUtilities[action] = expectedUtility; } else { actionsToUtilities[action] = actionsToUtilities[action] + expectedUtility; } } return(actionsToUtilities); }