private MDPUtilityFunction <TState> UpdateUtilityFunction(double gamma)
        {
            MDPUtilityFunction <TState> uf = utilityFunction.Copy();
            double u_s           = utilityFunction.GetUtility(PreviousState);
            double gammaUtilDIff = ((gamma * utilityFunction
                                     .GetUtility(CurrentState)) - utilityFunction
                                    .GetUtility(PreviousState));
            double alphaTerm = stateCount.ProbabilityOf(PreviousState)
                               * (previousReward + gammaUtilDIff);

            uf.SetUtility(PreviousState, u_s + alphaTerm);
            return(uf);
        }