public MDPPolicy <TState, TAction> RandomPolicy() { var policy = new MDPPolicy <TState, TAction>(); foreach (var s in nonFinalstates) { policy.SetAction(s, transitionModel.RandomActionFor(s)); } return(policy); }
public MDPUtilityFunction <TState> PolicyEvaluation( MDPPolicy <TState, TAction> pi, MDPUtilityFunction <TState> u, double gamma, int iterations) { var uDash = u.Copy(); for (var i = 0; i < iterations; i++) { uDash = this.ValueIterateOnceWith(gamma, pi, uDash); } return(uDash); }
public Pair <TAction, double> GetTransitionWithMaximumExpectedUtilityUsingPolicy( MDPPolicy <TState, TAction> policy, TState s, MDPUtilityFunction <TState> uf) { if ((IsTerminal(s))) { return(new Pair <TAction, Double>(null, 0.0)); } var transitionsWithStartingStateSAndActionFromPolicy = this.GetTransitionsWithStartingStateAndAction( s, policy.GetAction(s)); Dictionary <TAction, Double> actionsToUtilities = GetExpectedUtilityForSelectedTransitions( transitionsWithStartingStateSAndActionFromPolicy, uf); return(this.GetActionWithMaximumUtility(actionsToUtilities)); }
private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma, MDPPolicy <TState, TAction> pi, MDPUtilityFunction <TState> U) { MDPUtilityFunction <TState> uDash = U.Copy(); foreach (var s in this.nonFinalstates) { var highestPolicyTransition = this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U); double utility = rewardFunction.GetRewardFor(s) + (gamma * highestPolicyTransition.GetSecond()); uDash.SetUtility(s, utility); } // TODO: debugging code // System.out.println("ValueIterationOnce before " + U); // System.out.println("ValueIterationOnce after " + U_dash); return(uDash); }