Beispiel #1
0
        public Pair <MDPUtilityFunction <STATE_TYPE>, Double> valueIterateOnce(
            double gamma, MDPUtilityFunction <STATE_TYPE> presentUtilityFunction)
        {
            double maxUtilityGrowth = 0.0;
            MDPUtilityFunction <STATE_TYPE> newUtilityFunction = new MDPUtilityFunction <STATE_TYPE>();

            foreach (STATE_TYPE s in nonFinalstates)
            {
                // double utility = rewardFunction.getRewardFor(s)
                // + (gamma * highestUtilityTransition.getSecond());

                double utility = valueIterateOnceForGivenState(gamma,
                                                               presentUtilityFunction, s);

                double differenceInUtility = Math.Abs(utility
                                                      - presentUtilityFunction.getUtility(s));
                if (differenceInUtility > maxUtilityGrowth)
                {
                    maxUtilityGrowth = differenceInUtility;
                }
                newUtilityFunction.setUtility(s, utility);

                foreach (STATE_TYPE state in terminalStates)
                {
                    newUtilityFunction.setUtility(state, presentUtilityFunction
                                                  .getUtility(state));
                }
            }

            return(new Pair <MDPUtilityFunction <STATE_TYPE>, Double>(
                       newUtilityFunction, maxUtilityGrowth));
        }
Beispiel #2
0
        public MDPUtilityFunction <STATE_TYPE> valueIteration(double gamma,
                                                              double error, double delta)
        {
            MDPUtilityFunction <STATE_TYPE> U      = initialUtilityFunction();
            MDPUtilityFunction <STATE_TYPE> U_dash = initialUtilityFunction();
            double delta_max = (error * gamma) / (1 - gamma);

            do
            {
                U = U_dash.copy();
                // System.Console.WriteLine(U);
                delta = 0.0;
                foreach (STATE_TYPE s in nonFinalstates)
                {
                    Pair <ACTION_TYPE, Double> highestUtilityTransition = transitionModel
                                                                          .getTransitionWithMaximumExpectedUtility(s, U);
                    double utility = rewardFunction.getRewardFor(s)
                                     + (gamma * highestUtilityTransition.getSecond());
                    U_dash.setUtility(s, utility);
                    if ((Math.Abs(U_dash.getUtility(s) - U.getUtility(s))) > delta)
                    {
                        delta = Math.Abs(U_dash.getUtility(s) - U.getUtility(s));
                    }
                }
            } while (delta < delta_max);
            return(U);
        }
        public MDPUtilityFunction <STATE_TYPE> asUtilityFunction()
        {
            MDPUtilityFunction <STATE_TYPE> uf = new MDPUtilityFunction <STATE_TYPE>();

            foreach (STATE_TYPE state in stateToReward.Keys)
            {
                uf.setUtility(state, getRewardFor(state));
            }
            return(uf);
        }
        public MDPUtilityFunction <STATE_TYPE> copy()
        {
            MDPUtilityFunction <STATE_TYPE> other = new MDPUtilityFunction <STATE_TYPE>();

            foreach (STATE_TYPE state in hash.Keys)
            {
                other.setUtility(state, hash[state]);
            }
            return(other);
        }
Beispiel #5
0
        private MDPUtilityFunction <STATE_TYPE> valueIterateOnceWith(double gamma,
                                                                     MDPPolicy <STATE_TYPE, ACTION_TYPE> pi,
                                                                     MDPUtilityFunction <STATE_TYPE> U)
        {
            MDPUtilityFunction <STATE_TYPE> U_dash = U.copy();

            foreach (STATE_TYPE s in nonFinalstates)
            {
                Pair <ACTION_TYPE, Double> highestPolicyTransition = transitionModel
                                                                     .getTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s,
                                                                                                                         U);
                double utility = rewardFunction.getRewardFor(s)
                                 + (gamma * highestPolicyTransition.getSecond());
                U_dash.setUtility(s, utility);
            }
            // System.Console.WriteLine("ValueIterationOnce before " + U);
            // System.Console.WriteLine("ValueIterationOnce after " + U_dash);
            return(U_dash);
        }