Пример #1
0
        private MDPUtilityFunction <TState> UpdateUtilityFunction(double gamma)
        {
            MDPUtilityFunction <TState> uf = utilityFunction.Copy();
            double u_s           = utilityFunction.GetUtility(PreviousState);
            double gammaUtilDIff = ((gamma * utilityFunction
                                     .GetUtility(CurrentState)) - utilityFunction
                                    .GetUtility(PreviousState));
            double alphaTerm = stateCount.ProbabilityOf(PreviousState)
                               * (previousReward + gammaUtilDIff);

            uf.SetUtility(PreviousState, u_s + alphaTerm);
            return(uf);
        }
Пример #2
0
        private MDPUtilityFunction <TState> ValueDetermination(
            IList <MDPTransition <TState, TAction> > validTransitions,
            double gamma)
        {
            MDPUtilityFunction <TState> uf = utilityFunction.Copy();
            double additional = 0.0;

            if (validTransitions.Count > 0)
            {
                TState initState = validTransitions[0].GetInitialState();
                double reward    = MDP.GetRewardFor(initState);
                additional =
                    validTransitions.Sum(
                        transition =>
                        MDP.GetTransitionProbability(transition) *
                        this.utilityFunction.GetUtility(transition.GetDestinationState()));

                uf.SetUtility(initState, reward + (gamma * additional));
            }

            return(uf);
        }