private MDPUtilityFunction <TState> ValueDetermination( IList <MDPTransition <TState, TAction> > validTransitions, double gamma) { MDPUtilityFunction <TState> uf = utilityFunction.Copy(); double additional = 0.0; if (validTransitions.Count > 0) { TState initState = validTransitions[0].GetInitialState(); double reward = MDP.GetRewardFor(initState); additional = validTransitions.Sum( transition => MDP.GetTransitionProbability(transition) * this.utilityFunction.GetUtility(transition.GetDestinationState())); uf.SetUtility(initState, reward + (gamma * additional)); } return(uf); }
public MDPAgent(MDP <TState, TAction> mdp) { this.MDP = mdp; this.CurrentState = mdp.GetInitialState(); this.CurrentReward = mdp.GetRewardFor(this.CurrentState); }