Ejemplo n.º 1
0
        public Dictionary<Cell<Double>, Double> evaluate(Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi_i, Dictionary<Cell<Double>, Double> U,
            MDP mdp)
        {
            Dictionary<Cell<Double>, Double> U_i = new Dictionary<Cell<Double>, Double>(U);
            Dictionary<Cell<Double>, Double> U_ip1 = new Dictionary<Cell<Double>, Double>(U);

            for (int i = 0; i < k; i++)
            {

                foreach (var s in U.Keys)
                {

                    double aSum = 0;
                    if (pi_i.ContainsKey(s))
                    {
                        CS8803AGA.PsychSim.State.Action ap_i = pi_i[s];
                        foreach (var sDelta in U.Keys)
                        {
                            aSum += mdp.transitionProbability(sDelta, s, ap_i)
                                    * U_i[sDelta];
                        }
                    }
                    U_ip1[s] = (mdp.reward(s) + gamma * aSum);
                }

                Util.Merge(U_i, U_ip1);
            }
            return U_ip1;
        }
        public LookupPolicy policyIteration(MDP mdp, MDP mdp1, MDP mdp2)
        {
            Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), new Double());

            Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = initialPolicyVector(mdp);
            bool unchanged;

            do {

            U = policyEvaluation.evaluate(pi, U, mdp, mdp1, mdp2);

            unchanged = true;

            foreach (var s in mdp.GetStates())
            {
                if (pi.ContainsKey(s))
                {
                    double aMax = Double.MinValue, piVal = 0;

                    CS8803AGA.PsychSim.State.Action aArgmax = pi[s];
                    foreach (var a in mdp.actions(s))
                    {
                        double aSum = 0;
                        foreach (var sDelta in mdp.GetStates())
                        {
                            aSum += mdp.transitionProbability(sDelta, s, a)
                                    * U[sDelta];
                        }
                        if (aSum > aMax)
                        {
                            aMax = aSum;
                            aArgmax = a;
                        }

                        if (a.Equals(pi[s]))
                        {
                            piVal = aSum;
                        }
                    }

                    if (aMax > piVal)
                    {

                        pi[s] = aArgmax;

                        unchanged = false;
                    }
                }
            }

            } while (!unchanged);

            return new LookupPolicy(pi);
        }
Ejemplo n.º 3
0
        public Dictionary<Cell<Double>, Double> valueIteration(MDP mdp, double epsilon)
        {
            Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), 0.0);
            Dictionary<Cell<Double>, Double> Udelta = Util.create(mdp.GetStates(), 0.0);

            double delta = 0;

            double minDelta = epsilon * (1 - gamma) / gamma;

            do
            {

                Util.Merge(U, Udelta);

                delta = 0;

                foreach (Cell<Double> s in mdp.GetStates())
                {

                    HashSet<CS8803AGA.PsychSim.State.Action> actions = mdp.actions(s);

                    double aMax = 0;
                    if (actions.Count > 0)
                    {
                        aMax = Double.MinValue;
                    }
                    foreach (CS8803AGA.PsychSim.State.Action a in actions)
                    {

                        double aSum = 0;
                        foreach (Cell<Double> sDelta in mdp.GetStates())
                        {
                            aSum += mdp.transitionProbability(sDelta, s, a)
                                    * U[sDelta];
                        }
                        if (aSum > aMax)
                        {
                            aMax = aSum;
                        }
                    }
                    var val = mdp.reward(s) + gamma * aMax;
                    Udelta[s] = val;

                    double aDiff = Math.Abs(Udelta[s] - U[s]);
                    if (aDiff > delta)
                    {
                        delta = aDiff;
                    }
                }

            } while (delta > minDelta);

            return U;
        }