Beispiel #1
0
        public Pair <MDPUtilityFunction <TState>, Double> ValueIterateOnce(
            double gamma, MDPUtilityFunction <TState> presentUtilityFunction)
        {
            var maxUtilityGrowth   = 0.0;
            var newUtilityFunction = new MDPUtilityFunction <TState>();

            foreach (TState s in nonFinalstates)
            {
                // TODO: figure out what to do with commented out code
                // double utility = rewardFunction.getRewardFor(s)
                // + (gamma * highestUtilityTransition.getSecond());

                var utility = this.ValueIterateOnceForGivenState(gamma,
                                                                 presentUtilityFunction, s);

                var differenceInUtility = Math.Abs(utility
                                                   - presentUtilityFunction.GetUtility(s));
                if (differenceInUtility > maxUtilityGrowth)
                {
                    maxUtilityGrowth = differenceInUtility;
                }
                newUtilityFunction.SetUtility(s, utility);

                foreach (var state in terminalStates)
                {
                    newUtilityFunction.SetUtility(state, presentUtilityFunction
                                                  .GetUtility(state));
                }
            }

            return(new Pair <MDPUtilityFunction <TState>, Double>(
                       newUtilityFunction, maxUtilityGrowth));
        }
Beispiel #2
0
        private Dictionary <TAction, Double> GetExpectedUtilityForSelectedTransitions(
            IList <MDPTransition <TState, TAction> > transitions,
            MDPUtilityFunction <TState> uf)
        {
            var actionsToUtilities = new Dictionary <TAction, double>();

            foreach (var triplet in transitions)
            {
                var s                       = triplet.GetInitialState();
                var action                  = triplet.GetAction();
                var destinationState        = triplet.GetDestinationState();
                var probabilityOfTransition = this.GetTransitionProbability(s,
                                                                            action, destinationState);
                var expectedUtility = (probabilityOfTransition * uf
                                       .GetUtility(destinationState));

                if (!actionsToUtilities.ContainsKey(action))
                {
                    actionsToUtilities[action] = expectedUtility;
                }
                else
                {
                    actionsToUtilities[action] = actionsToUtilities[action] + expectedUtility;
                }
            }
            return(actionsToUtilities);
        }