Ejemplo n.º 1
0
        public MDPPolicy <TState, TAction> RandomPolicy()
        {
            var policy = new MDPPolicy <TState, TAction>();

            foreach (var s in nonFinalstates)
            {
                policy.SetAction(s, transitionModel.RandomActionFor(s));
            }
            return(policy);
        }
Ejemplo n.º 2
0
        public MDPUtilityFunction <TState> PolicyEvaluation(
            MDPPolicy <TState, TAction> pi,
            MDPUtilityFunction <TState> u, double gamma, int iterations)
        {
            var uDash = u.Copy();

            for (var i = 0; i < iterations; i++)
            {
                uDash = this.ValueIterateOnceWith(gamma, pi, uDash);
            }
            return(uDash);
        }
Ejemplo n.º 3
0
        public Pair <TAction, double> GetTransitionWithMaximumExpectedUtilityUsingPolicy(
            MDPPolicy <TState, TAction> policy, TState s,
            MDPUtilityFunction <TState> uf)
        {
            if ((IsTerminal(s)))
            {
                return(new Pair <TAction, Double>(null, 0.0));
            }
            var transitionsWithStartingStateSAndActionFromPolicy = this.GetTransitionsWithStartingStateAndAction(
                s, policy.GetAction(s));
            Dictionary <TAction, Double> actionsToUtilities = GetExpectedUtilityForSelectedTransitions(
                transitionsWithStartingStateSAndActionFromPolicy, uf);

            return(this.GetActionWithMaximumUtility(actionsToUtilities));
        }
Ejemplo n.º 4
0
        private MDPUtilityFunction <TState> ValueIterateOnceWith(double gamma,
                                                                 MDPPolicy <TState, TAction> pi,
                                                                 MDPUtilityFunction <TState> U)
        {
            MDPUtilityFunction <TState> uDash = U.Copy();

            foreach (var s in this.nonFinalstates)
            {
                var highestPolicyTransition =
                    this.transitionModel.GetTransitionWithMaximumExpectedUtilityUsingPolicy(pi, s, U);
                double utility = rewardFunction.GetRewardFor(s)
                                 + (gamma * highestPolicyTransition.GetSecond());
                uDash.SetUtility(s, utility);
            }
            // TODO: debugging code
            // System.out.println("ValueIterationOnce before " + U);
            // System.out.println("ValueIterationOnce after " + U_dash);
            return(uDash);
        }