Exemplo n.º 1
0
        public Pair <MDPUtilityFunction <STATE_TYPE>, Double> valueIterateOnce(
            double gamma, MDPUtilityFunction <STATE_TYPE> presentUtilityFunction)
        {
            double maxUtilityGrowth = 0.0;
            MDPUtilityFunction <STATE_TYPE> newUtilityFunction = new MDPUtilityFunction <STATE_TYPE>();

            foreach (STATE_TYPE s in nonFinalstates)
            {
                // double utility = rewardFunction.getRewardFor(s)
                // + (gamma * highestUtilityTransition.getSecond());

                double utility = valueIterateOnceForGivenState(gamma,
                                                               presentUtilityFunction, s);

                double differenceInUtility = Math.Abs(utility
                                                      - presentUtilityFunction.getUtility(s));
                if (differenceInUtility > maxUtilityGrowth)
                {
                    maxUtilityGrowth = differenceInUtility;
                }
                newUtilityFunction.setUtility(s, utility);

                foreach (STATE_TYPE state in terminalStates)
                {
                    newUtilityFunction.setUtility(state, presentUtilityFunction
                                                  .getUtility(state));
                }
            }

            return(new Pair <MDPUtilityFunction <STATE_TYPE>, Double>(
                       newUtilityFunction, maxUtilityGrowth));
        }
Exemplo n.º 2
0
        public MDPUtilityFunction <STATE_TYPE> valueIteration(double gamma,
                                                              double error, double delta)
        {
            MDPUtilityFunction <STATE_TYPE> U      = initialUtilityFunction();
            MDPUtilityFunction <STATE_TYPE> U_dash = initialUtilityFunction();
            double delta_max = (error * gamma) / (1 - gamma);

            do
            {
                U = U_dash.copy();
                // System.Console.WriteLine(U);
                delta = 0.0;
                foreach (STATE_TYPE s in nonFinalstates)
                {
                    Pair <ACTION_TYPE, Double> highestUtilityTransition = transitionModel
                                                                          .getTransitionWithMaximumExpectedUtility(s, U);
                    double utility = rewardFunction.getRewardFor(s)
                                     + (gamma * highestUtilityTransition.getSecond());
                    U_dash.setUtility(s, utility);
                    if ((Math.Abs(U_dash.getUtility(s) - U.getUtility(s))) > delta)
                    {
                        delta = Math.Abs(U_dash.getUtility(s) - U.getUtility(s));
                    }
                }
            } while (delta < delta_max);
            return(U);
        }
Exemplo n.º 3
0
        private Dictionary <ACTION_TYPE, Double> getExpectedUtilityForSelectedTransitions(

            List <MDPTransition <STATE_TYPE, ACTION_TYPE> > transitions,
            MDPUtilityFunction <STATE_TYPE> uf)
        {
            Dictionary <ACTION_TYPE, Double> actionsToUtilities = new Dictionary <ACTION_TYPE, Double>();

            foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> triplet in transitions)
            {
                STATE_TYPE  s                       = triplet.getInitialState();
                ACTION_TYPE action                  = triplet.getAction();
                STATE_TYPE  destinationState        = triplet.getDestinationState();
                double      probabilityOfTransition = getTransitionProbability(s,
                                                                               action, destinationState);
                double expectedUtility = (probabilityOfTransition * uf
                                          .getUtility(destinationState));
                Double presentValue = actionsToUtilities.ContainsKey(action)? actionsToUtilities[action]: Double.MinValue;

                if (presentValue == Double.MinValue)
                {
                    actionsToUtilities.Add(action, expectedUtility);
                }
                else
                {
                    actionsToUtilities[action] += expectedUtility;
                }
            }
            return(actionsToUtilities);
        }