private MDPUtilityFunction <TState> UpdateUtilityFunction(double gamma) { MDPUtilityFunction <TState> uf = utilityFunction.Copy(); double u_s = utilityFunction.GetUtility(PreviousState); double gammaUtilDIff = ((gamma * utilityFunction .GetUtility(CurrentState)) - utilityFunction .GetUtility(PreviousState)); double alphaTerm = stateCount.ProbabilityOf(PreviousState) * (previousReward + gammaUtilDIff); uf.SetUtility(PreviousState, u_s + alphaTerm); return(uf); }
private MDPUtilityFunction <TState> ValueDetermination( IList <MDPTransition <TState, TAction> > validTransitions, double gamma) { MDPUtilityFunction <TState> uf = utilityFunction.Copy(); double additional = 0.0; if (validTransitions.Count > 0) { TState initState = validTransitions[0].GetInitialState(); double reward = MDP.GetRewardFor(initState); additional = validTransitions.Sum( transition => MDP.GetTransitionProbability(transition) * this.utilityFunction.GetUtility(transition.GetDestinationState())); uf.SetUtility(initState, reward + (gamma * additional)); } return(uf); }