private MDPUtilityFunction <TState> UpdateUtilityFunction(double gamma) { MDPUtilityFunction <TState> uf = utilityFunction.Copy(); double u_s = utilityFunction.GetUtility(PreviousState); double gammaUtilDIff = ((gamma * utilityFunction .GetUtility(CurrentState)) - utilityFunction .GetUtility(PreviousState)); double alphaTerm = stateCount.ProbabilityOf(PreviousState) * (previousReward + gammaUtilDIff); uf.SetUtility(PreviousState, u_s + alphaTerm); return(uf); }