public Pair <MDPUtilityFunction <TState>, Double> ValueIterateOnce( double gamma, MDPUtilityFunction <TState> presentUtilityFunction) { var maxUtilityGrowth = 0.0; var newUtilityFunction = new MDPUtilityFunction <TState>(); foreach (TState s in nonFinalstates) { // TODO: figure out what to do with commented out code // double utility = rewardFunction.getRewardFor(s) // + (gamma * highestUtilityTransition.getSecond()); var utility = this.ValueIterateOnceForGivenState(gamma, presentUtilityFunction, s); var differenceInUtility = Math.Abs(utility - presentUtilityFunction.GetUtility(s)); if (differenceInUtility > maxUtilityGrowth) { maxUtilityGrowth = differenceInUtility; } newUtilityFunction.SetUtility(s, utility); foreach (var state in terminalStates) { newUtilityFunction.SetUtility(state, presentUtilityFunction .GetUtility(state)); } } return(new Pair <MDPUtilityFunction <TState>, Double>( newUtilityFunction, maxUtilityGrowth)); }
private Dictionary <TAction, Double> GetExpectedUtilityForSelectedTransitions( IList <MDPTransition <TState, TAction> > transitions, MDPUtilityFunction <TState> uf) { var actionsToUtilities = new Dictionary <TAction, double>(); foreach (var triplet in transitions) { var s = triplet.GetInitialState(); var action = triplet.GetAction(); var destinationState = triplet.GetDestinationState(); var probabilityOfTransition = this.GetTransitionProbability(s, action, destinationState); var expectedUtility = (probabilityOfTransition * uf .GetUtility(destinationState)); if (!actionsToUtilities.ContainsKey(action)) { actionsToUtilities[action] = expectedUtility; } else { actionsToUtilities[action] = actionsToUtilities[action] + expectedUtility; } } return(actionsToUtilities); }
public MDPUtilityFunction <TState> Copy() { var other = new MDPUtilityFunction <TState>(); foreach (TState state in hash.Keys) { other.SetUtility(state, hash[state]); } return(other); }
public MDPUtilityFunction <TState> AsUtilityFunction() { var uf = new MDPUtilityFunction <TState>(); foreach (TState state in stateToReward.Keys) { uf.SetUtility(state, this.GetRewardFor(state)); } return(uf); }
private double ValueIterateOnceForGivenState(double gamma, MDPUtilityFunction <TState> presentUtilityFunction, TState state) { var highestUtilityTransition = transitionModel .GetTransitionWithMaximumExpectedUtility(state, presentUtilityFunction); var utility = rewardFunction.GetRewardFor(state) + (gamma * highestUtilityTransition.GetSecond()); return(utility); }
public MDPUtilityFunction <TState> PolicyEvaluation( MDPPolicy <TState, TAction> pi, MDPUtilityFunction <TState> u, double gamma, int iterations) { var uDash = u.Copy(); for (var i = 0; i < iterations; i++) { uDash = this.ValueIterateOnceWith(gamma, pi, uDash); } return(uDash); }
public Pair <TAction, double> GetTransitionWithMaximumExpectedUtility( TState s, MDPUtilityFunction <TState> uf) { if ((IsTerminal(s))) { return(new Pair <TAction, Double>(null, 0.0)); } var transitionsStartingWithS = this.GetTransitionsStartingWith(s); Dictionary <TAction, double> actionsToUtilities = GetExpectedUtilityForSelectedTransitions(transitionsStartingWithS, uf); return(GetActionWithMaximumUtility(actionsToUtilities)); }
public Pair <TAction, double> GetTransitionWithMaximumExpectedUtilityUsingPolicy( MDPPolicy <TState, TAction> policy, TState s, MDPUtilityFunction <TState> uf) { if ((IsTerminal(s))) { return(new Pair <TAction, Double>(null, 0.0)); } var transitionsWithStartingStateSAndActionFromPolicy = this.GetTransitionsWithStartingStateAndAction( s, policy.GetAction(s)); Dictionary <TAction, Double> actionsToUtilities = GetExpectedUtilityForSelectedTransitions( transitionsWithStartingStateSAndActionFromPolicy, uf); return(this.GetActionWithMaximumUtility(actionsToUtilities)); }
private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma, MDPPolicy <TState, TAction> pi, MDPUtilityFunction <TState> U) { MDPUtilityFunction <TState> uDash = U.Copy(); foreach (var s in this.nonFinalstates) { var highestPolicyTransition = this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U); double utility = rewardFunction.GetRewardFor(s) + (gamma * highestPolicyTransition.GetSecond()); uDash.SetUtility(s, utility); } // TODO: debugging code // System.out.println("ValueIterationOnce before " + U); // System.out.println("ValueIterationOnce after " + U_dash); return(uDash); }