Example #1
0
        public Pair <MDPUtilityFunction <TState>, Double> ValueIterateOnce(
            double gamma, MDPUtilityFunction <TState> presentUtilityFunction)
        {
            var maxUtilityGrowth   = 0.0;
            var newUtilityFunction = new MDPUtilityFunction <TState>();

            foreach (TState s in nonFinalstates)
            {
                // TODO: figure out what to do with commented out code
                // double utility = rewardFunction.getRewardFor(s)
                // + (gamma * highestUtilityTransition.getSecond());

                var utility = this.ValueIterateOnceForGivenState(gamma,
                                                                 presentUtilityFunction, s);

                var differenceInUtility = Math.Abs(utility
                                                   - presentUtilityFunction.GetUtility(s));
                if (differenceInUtility > maxUtilityGrowth)
                {
                    maxUtilityGrowth = differenceInUtility;
                }
                newUtilityFunction.SetUtility(s, utility);

                foreach (var state in terminalStates)
                {
                    newUtilityFunction.SetUtility(state, presentUtilityFunction
                                                  .GetUtility(state));
                }
            }

            return(new Pair <MDPUtilityFunction <TState>, Double>(
                       newUtilityFunction, maxUtilityGrowth));
        }
Example #2
0
        public MDPUtilityFunction <TState> Copy()
        {
            var other = new MDPUtilityFunction <TState>();

            foreach (TState state in hash.Keys)
            {
                other.SetUtility(state, hash[state]);
            }
            return(other);
        }
        public MDPUtilityFunction <TState> AsUtilityFunction()
        {
            var uf = new MDPUtilityFunction <TState>();

            foreach (TState state in stateToReward.Keys)
            {
                uf.SetUtility(state, this.GetRewardFor(state));
            }
            return(uf);
        }
Example #4
0
        private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma,
                                                                 MDPPolicy <TState, TAction> pi,
                                                                 MDPUtilityFunction <TState> U)
        {
            MDPUtilityFunction <TState> uDash = U.Copy();

            foreach (var s in this.nonFinalstates)
            {
                var highestPolicyTransition =
                    this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U);
                double utility = rewardFunction.GetRewardFor(s)
                                 + (gamma * highestPolicyTransition.GetSecond());
                uDash.SetUtility(s, utility);
            }
            // TODO: debugging code
            // System.out.println("ValueIterationOnce before " + U);
            // System.out.println("ValueIterationOnce after " + U_dash);
            return(uDash);
        }