Ejemplo n.º 1
0
        public MDPPolicy <STATE_TYPE, ACTION_TYPE> policyIteration(double gamma)
        {
            MDPUtilityFunction <STATE_TYPE>     U  = initialUtilityFunction();
            MDPPolicy <STATE_TYPE, ACTION_TYPE> pi = randomPolicy();
            bool unchanged = false;

            do
            {
                unchanged = true;

                U = policyEvaluation(pi, U, gamma, 3);
                foreach (STATE_TYPE s in nonFinalstates)
                {
                    Pair <ACTION_TYPE, Double> maxTransit = transitionModel
                                                            .getTransitionWithMaximumExpectedUtility(s, U);
                    Pair <ACTION_TYPE, Double> maxPolicyTransit = transitionModel
                                                                  .getTransitionWithMaximumExpectedUtilityUsingPolicy(pi,
                                                                                                                      s, U);

                    if (maxTransit.getSecond() > maxPolicyTransit.getSecond())
                    {
                        pi.setAction(s, maxTransit.getFirst());
                        unchanged = false;
                    }
                }
            } while (unchanged == false);
            return(pi);
        }
Ejemplo n.º 2
0
        public MDPUtilityFunction <STATE_TYPE> valueIteration(double gamma,
                                                              double error, double delta)
        {
            MDPUtilityFunction <STATE_TYPE> U      = initialUtilityFunction();
            MDPUtilityFunction <STATE_TYPE> U_dash = initialUtilityFunction();
            double delta_max = (error * gamma) / (1 - gamma);

            do
            {
                U = U_dash.copy();
                // System.Console.WriteLine(U);
                delta = 0.0;
                foreach (STATE_TYPE s in nonFinalstates)
                {
                    Pair <ACTION_TYPE, Double> highestUtilityTransition = transitionModel
                                                                          .getTransitionWithMaximumExpectedUtility(s, U);
                    double utility = rewardFunction.getRewardFor(s)
                                     + (gamma * highestUtilityTransition.getSecond());
                    U_dash.setUtility(s, utility);
                    if ((Math.Abs(U_dash.getUtility(s) - U.getUtility(s))) > delta)
                    {
                        delta = Math.Abs(U_dash.getUtility(s) - U.getUtility(s));
                    }
                }
            } while (delta < delta_max);
            return(U);
        }
Ejemplo n.º 3
0
        public Pair <MDPUtilityFunction <STATE_TYPE>, Double> valueIterateOnce(
            double gamma, MDPUtilityFunction <STATE_TYPE> presentUtilityFunction)
        {
            double maxUtilityGrowth = 0.0;
            MDPUtilityFunction <STATE_TYPE> newUtilityFunction = new MDPUtilityFunction <STATE_TYPE>();

            foreach (STATE_TYPE s in nonFinalstates)
            {
                // double utility = rewardFunction.getRewardFor(s)
                // + (gamma * highestUtilityTransition.getSecond());

                double utility = valueIterateOnceForGivenState(gamma,
                                                               presentUtilityFunction, s);

                double differenceInUtility = Math.Abs(utility
                                                      - presentUtilityFunction.getUtility(s));
                if (differenceInUtility > maxUtilityGrowth)
                {
                    maxUtilityGrowth = differenceInUtility;
                }
                newUtilityFunction.setUtility(s, utility);

                foreach (STATE_TYPE state in terminalStates)
                {
                    newUtilityFunction.setUtility(state, presentUtilityFunction
                                                  .getUtility(state));
                }
            }

            return(new Pair <MDPUtilityFunction <STATE_TYPE>, Double>(
                       newUtilityFunction, maxUtilityGrowth));
        }
Ejemplo n.º 4
0
        private Dictionary <ACTION_TYPE, Double> getExpectedUtilityForSelectedTransitions(

            List <MDPTransition <STATE_TYPE, ACTION_TYPE> > transitions,
            MDPUtilityFunction <STATE_TYPE> uf)
        {
            Dictionary <ACTION_TYPE, Double> actionsToUtilities = new Dictionary <ACTION_TYPE, Double>();

            foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> triplet in transitions)
            {
                STATE_TYPE  s                       = triplet.getInitialState();
                ACTION_TYPE action                  = triplet.getAction();
                STATE_TYPE  destinationState        = triplet.getDestinationState();
                double      probabilityOfTransition = getTransitionProbability(s,
                                                                               action, destinationState);
                double expectedUtility = (probabilityOfTransition * uf
                                          .getUtility(destinationState));
                Double presentValue = actionsToUtilities.ContainsKey(action)? actionsToUtilities[action]: Double.MinValue;

                if (presentValue == Double.MinValue)
                {
                    actionsToUtilities.Add(action, expectedUtility);
                }
                else
                {
                    actionsToUtilities[action] += expectedUtility;
                }
            }
            return(actionsToUtilities);
        }
Ejemplo n.º 5
0
        public MDPUtilityFunction <STATE_TYPE> asUtilityFunction()
        {
            MDPUtilityFunction <STATE_TYPE> uf = new MDPUtilityFunction <STATE_TYPE>();

            foreach (STATE_TYPE state in stateToReward.Keys)
            {
                uf.setUtility(state, getRewardFor(state));
            }
            return(uf);
        }
Ejemplo n.º 6
0
        public MDPUtilityFunction <STATE_TYPE> copy()
        {
            MDPUtilityFunction <STATE_TYPE> other = new MDPUtilityFunction <STATE_TYPE>();

            foreach (STATE_TYPE state in hash.Keys)
            {
                other.setUtility(state, hash[state]);
            }
            return(other);
        }
Ejemplo n.º 7
0
        //
        // PRIVATE METHODS
        //

        private double valueIterateOnceForGivenState(double gamma,
                                                     MDPUtilityFunction <STATE_TYPE> presentUtilityFunction,
                                                     STATE_TYPE state)
        {
            Pair <ACTION_TYPE, Double> highestUtilityTransition = transitionModel
                                                                  .getTransitionWithMaximumExpectedUtility(state,
                                                                                                           presentUtilityFunction);
            double utility = rewardFunction.getRewardFor(state)
                             + (gamma * highestUtilityTransition.getSecond());

            return(utility);
        }
Ejemplo n.º 8
0
        public MDPUtilityFunction <STATE_TYPE> policyEvaluation(
            MDPPolicy <STATE_TYPE, ACTION_TYPE> pi,
            MDPUtilityFunction <STATE_TYPE> U, double gamma, int iterations)
        {
            MDPUtilityFunction <STATE_TYPE> U_dash = U.copy();

            for (int i = 0; i < iterations; i++)
            {
                U_dash = valueIterateOnceWith(gamma, pi, U_dash);
            }
            return(U_dash);
        }
Ejemplo n.º 9
0
        public Pair <ACTION_TYPE, Double> getTransitionWithMaximumExpectedUtility(
            STATE_TYPE s, MDPUtilityFunction <STATE_TYPE> uf)
        {
            if ((isTerminal(s)))
            {
                return(new Pair <ACTION_TYPE, Double>(default(ACTION_TYPE), 0.0));
            }

            List <MDPTransition <STATE_TYPE, ACTION_TYPE> > transitionsStartingWithS = getTransitionsStartingWith(s);
            Dictionary <ACTION_TYPE, Double> actionsToUtilities = getExpectedUtilityForSelectedTransitions(
                transitionsStartingWithS, uf);

            return(getActionWithMaximumUtility(actionsToUtilities));
        }
Ejemplo n.º 10
0
        public Pair <ACTION_TYPE, Double> getTransitionWithMaximumExpectedUtilityUsingPolicy(
            MDPPolicy <STATE_TYPE, ACTION_TYPE> policy, STATE_TYPE s,
            MDPUtilityFunction <STATE_TYPE> uf)
        {
            if ((isTerminal(s)))
            {
                return(new Pair <ACTION_TYPE, Double>(default(ACTION_TYPE), 0.0));
            }
            List <MDPTransition <STATE_TYPE, ACTION_TYPE> > transitionsWithStartingStateSAndActionFromPolicy = getTransitionsWithStartingStateAndAction(
                s, policy.getAction(s));
            Dictionary <ACTION_TYPE, Double> actionsToUtilities = getExpectedUtilityForSelectedTransitions(
                transitionsWithStartingStateSAndActionFromPolicy, uf);

            return(getActionWithMaximumUtility(actionsToUtilities));
        }
Ejemplo n.º 11
0
        public MDPUtilityFunction <STATE_TYPE> valueIterationForFixedIterations(
            int numberOfIterations, double gamma)
        {
            MDPUtilityFunction <STATE_TYPE> utilityFunction = initialUtilityFunction();

            for (int i = 0; i < numberOfIterations; i++)
            {
                Pair <MDPUtilityFunction <STATE_TYPE>, Double> result = valueIterateOnce(
                    gamma, utilityFunction);
                utilityFunction = result.getFirst();
                // double maxUtilityGrowth = result.getSecond();
                // System.Console.WriteLine("maxUtilityGrowth " + maxUtilityGrowth);
            }

            return(utilityFunction);
        }
Ejemplo n.º 12
0
        private MDPUtilityFunction <STATE_TYPE> valueIterateOnceWith(double gamma,
                                                                     MDPPolicy <STATE_TYPE, ACTION_TYPE> pi,
                                                                     MDPUtilityFunction <STATE_TYPE> U)
        {
            MDPUtilityFunction <STATE_TYPE> U_dash = U.copy();

            foreach (STATE_TYPE s in nonFinalstates)
            {
                Pair <ACTION_TYPE, Double> highestPolicyTransition = transitionModel
                                                                     .getTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s,
                                                                                                                         U);
                double utility = rewardFunction.getRewardFor(s)
                                 + (gamma * highestPolicyTransition.getSecond());
                U_dash.setUtility(s, utility);
            }
            // System.Console.WriteLine("ValueIterationOnce before " + U);
            // System.Console.WriteLine("ValueIterationOnce after " + U_dash);
            return(U_dash);
        }
Ejemplo n.º 13
0
        public MDPUtilityFunction <STATE_TYPE> valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(
            double gamma, double errorMargin)
        {
            int    iterationCounter = 0;
            double maxUtilityGrowth = 0.0;
            MDPUtilityFunction <STATE_TYPE> utilityFunction = initialUtilityFunction();

            do
            {
                Pair <MDPUtilityFunction <STATE_TYPE>, Double> result = valueIterateOnce(
                    gamma, utilityFunction);
                utilityFunction  = result.getFirst();
                maxUtilityGrowth = result.getSecond();
                iterationCounter++;
                // System.Console.WriteLine("Itration Number" +iterationCounter + " max
                // utility growth " + maxUtilityGrowth);
            } while (maxUtilityGrowth > errorMargin);

            return(utilityFunction);
        }