예제 #1
0
        //
        // PRIVATE METHODS
        //

        private MDPUtilityFunction <STATE_TYPE> updateUtilityFunction(double gamma)
        {
            MDPUtilityFunction <STATE_TYPE> uf = utilityFunction.copy();
            double u_s           = utilityFunction.getUtility(previousState);
            double gammaUtilDIff = ((gamma * utilityFunction
                                     .getUtility(currentState)) - utilityFunction
                                    .getUtility(previousState));
            double alphaTerm = stateCount.probabilityOf(previousState)
                               * (previousReward + gammaUtilDIff);

            uf.setUtility(previousState, u_s + alphaTerm);
            return(uf);
        }