Beispiel #1
0
        public Pair <MDPUtilityFunction <TState>, Double> ValueIterateOnce(
            double gamma, MDPUtilityFunction <TState> presentUtilityFunction)
        {
            var maxUtilityGrowth   = 0.0;
            var newUtilityFunction = new MDPUtilityFunction <TState>();

            foreach (TState s in nonFinalstates)
            {
                // TODO: figure out what to do with commented out code
                // double utility = rewardFunction.getRewardFor(s)
                // + (gamma * highestUtilityTransition.getSecond());

                var utility = this.ValueIterateOnceForGivenState(gamma,
                                                                 presentUtilityFunction, s);

                var differenceInUtility = Math.Abs(utility
                                                   - presentUtilityFunction.GetUtility(s));
                if (differenceInUtility > maxUtilityGrowth)
                {
                    maxUtilityGrowth = differenceInUtility;
                }
                newUtilityFunction.SetUtility(s, utility);

                foreach (var state in terminalStates)
                {
                    newUtilityFunction.SetUtility(state, presentUtilityFunction
                                                  .GetUtility(state));
                }
            }

            return(new Pair <MDPUtilityFunction <TState>, Double>(
                       newUtilityFunction, maxUtilityGrowth));
        }
Beispiel #2
0
        private Dictionary <TAction, Double> GetExpectedUtilityForSelectedTransitions(
            IList <MDPTransition <TState, TAction> > transitions,
            MDPUtilityFunction <TState> uf)
        {
            var actionsToUtilities = new Dictionary <TAction, double>();

            foreach (var triplet in transitions)
            {
                var s                       = triplet.GetInitialState();
                var action                  = triplet.GetAction();
                var destinationState        = triplet.GetDestinationState();
                var probabilityOfTransition = this.GetTransitionProbability(s,
                                                                            action, destinationState);
                var expectedUtility = (probabilityOfTransition * uf
                                       .GetUtility(destinationState));

                if (!actionsToUtilities.ContainsKey(action))
                {
                    actionsToUtilities[action] = expectedUtility;
                }
                else
                {
                    actionsToUtilities[action] = actionsToUtilities[action] + expectedUtility;
                }
            }
            return(actionsToUtilities);
        }
Beispiel #3
0
        public MDPUtilityFunction <TState> Copy()
        {
            var other = new MDPUtilityFunction <TState>();

            foreach (TState state in hash.Keys)
            {
                other.SetUtility(state, hash[state]);
            }
            return(other);
        }
        public MDPUtilityFunction <TState> AsUtilityFunction()
        {
            var uf = new MDPUtilityFunction <TState>();

            foreach (TState state in stateToReward.Keys)
            {
                uf.SetUtility(state, this.GetRewardFor(state));
            }
            return(uf);
        }
Beispiel #5
0
        private double ValueIterateOnceForGivenState(double gamma,
                                                     MDPUtilityFunction <TState> presentUtilityFunction,
                                                     TState state)
        {
            var highestUtilityTransition = transitionModel
                                           .GetTransitionWithMaximumExpectedUtility(state,
                                                                                    presentUtilityFunction);
            var utility = rewardFunction.GetRewardFor(state)
                          + (gamma * highestUtilityTransition.GetSecond());

            return(utility);
        }
Beispiel #6
0
        public MDPUtilityFunction <TState> PolicyEvaluation(
            MDPPolicy <TState, TAction> pi,
            MDPUtilityFunction <TState> u, double gamma, int iterations)
        {
            var uDash = u.Copy();

            for (var i = 0; i < iterations; i++)
            {
                uDash = this.ValueIterateOnceWith(gamma, pi, uDash);
            }
            return(uDash);
        }
Beispiel #7
0
        public Pair <TAction, double> GetTransitionWithMaximumExpectedUtility(
            TState s, MDPUtilityFunction <TState> uf)
        {
            if ((IsTerminal(s)))
            {
                return(new Pair <TAction, Double>(null, 0.0));
            }

            var transitionsStartingWithS = this.GetTransitionsStartingWith(s);
            Dictionary <TAction, double> actionsToUtilities = GetExpectedUtilityForSelectedTransitions(transitionsStartingWithS, uf);

            return(GetActionWithMaximumUtility(actionsToUtilities));
        }
Beispiel #8
0
        public Pair <TAction, double> GetTransitionWithMaximumExpectedUtilityUsingPolicy(
            MDPPolicy <TState, TAction> policy, TState s,
            MDPUtilityFunction <TState> uf)
        {
            if ((IsTerminal(s)))
            {
                return(new Pair <TAction, Double>(null, 0.0));
            }
            var transitionsWithStartingStateSAndActionFromPolicy = this.GetTransitionsWithStartingStateAndAction(
                s, policy.GetAction(s));
            Dictionary <TAction, Double> actionsToUtilities = GetExpectedUtilityForSelectedTransitions(
                transitionsWithStartingStateSAndActionFromPolicy, uf);

            return(this.GetActionWithMaximumUtility(actionsToUtilities));
        }
Beispiel #9
0
        private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma,
                                                                 MDPPolicy <TState, TAction> pi,
                                                                 MDPUtilityFunction <TState> U)
        {
            MDPUtilityFunction <TState> uDash = U.Copy();

            foreach (var s in this.nonFinalstates)
            {
                var highestPolicyTransition =
                    this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U);
                double utility = rewardFunction.GetRewardFor(s)
                                 + (gamma * highestPolicyTransition.GetSecond());
                uDash.SetUtility(s, utility);
            }
            // TODO: debugging code
            // System.out.println("ValueIterationOnce before " + U);
            // System.out.println("ValueIterationOnce after " + U_dash);
            return(uDash);
        }