public Dictionary<Cell<Double>, Double> evaluate(Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi_i, Dictionary<Cell<Double>, Double> U,
            MDP mdp)
        {
            Dictionary<Cell<Double>, Double> U_i = new Dictionary<Cell<Double>, Double>(U);
            Dictionary<Cell<Double>, Double> U_ip1 = new Dictionary<Cell<Double>, Double>(U);

            for (int i = 0; i < k; i++)
            {

                foreach (var s in U.Keys)
                {

                    double aSum = 0;
                    if (pi_i.ContainsKey(s))
                    {
                        CS8803AGA.PsychSim.State.Action ap_i = pi_i[s];
                        foreach (var sDelta in U.Keys)
                        {
                            aSum += mdp.transitionProbability(sDelta, s, ap_i)
                                    * U_i[sDelta];
                        }
                    }
                    U_ip1[s] = (mdp.reward(s) + gamma * aSum);
                }

                Util.Merge(U_i, U_ip1);
            }
            return U_ip1;
        }
예제 #2
0
        public POMDP(MDP mdp)
        {
            this.mdp = mdp;
            belief = new State<Double>(3, 3, 0);
            sensor = new Sensor();

            belief.setContent(new double[] { 0.311, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0 });
        }
        public LookupPolicy policyIteration(MDP mdp, MDP mdp1, MDP mdp2)
        {
            Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), new Double());

            Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = initialPolicyVector(mdp);
            bool unchanged;

            do {

            U = policyEvaluation.evaluate(pi, U, mdp, mdp1, mdp2);

            unchanged = true;

            foreach (var s in mdp.GetStates())
            {
                if (pi.ContainsKey(s))
                {
                    double aMax = Double.MinValue, piVal = 0;

                    CS8803AGA.PsychSim.State.Action aArgmax = pi[s];
                    foreach (var a in mdp.actions(s))
                    {
                        double aSum = 0;
                        foreach (var sDelta in mdp.GetStates())
                        {
                            aSum += mdp.transitionProbability(sDelta, s, a)
                                    * U[sDelta];
                        }
                        if (aSum > aMax)
                        {
                            aMax = aSum;
                            aArgmax = a;
                        }

                        if (a.Equals(pi[s]))
                        {
                            piVal = aSum;
                        }
                    }

                    if (aMax > piVal)
                    {

                        pi[s] = aArgmax;

                        unchanged = false;
                    }
                }
            }

            } while (!unchanged);

            return new LookupPolicy(pi);
        }
예제 #4
0
        public Dictionary<Cell<Double>, Double> valueIteration(MDP mdp, double epsilon)
        {
            Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), 0.0);
            Dictionary<Cell<Double>, Double> Udelta = Util.create(mdp.GetStates(), 0.0);

            double delta = 0;

            double minDelta = epsilon * (1 - gamma) / gamma;

            do
            {

                Util.Merge(U, Udelta);

                delta = 0;

                foreach (Cell<Double> s in mdp.GetStates())
                {

                    HashSet<CS8803AGA.PsychSim.State.Action> actions = mdp.actions(s);

                    double aMax = 0;
                    if (actions.Count > 0)
                    {
                        aMax = Double.MinValue;
                    }
                    foreach (CS8803AGA.PsychSim.State.Action a in actions)
                    {

                        double aSum = 0;
                        foreach (Cell<Double> sDelta in mdp.GetStates())
                        {
                            aSum += mdp.transitionProbability(sDelta, s, a)
                                    * U[sDelta];
                        }
                        if (aSum > aMax)
                        {
                            aMax = aSum;
                        }
                    }
                    var val = mdp.reward(s) + gamma * aMax;
                    Udelta[s] = val;

                    double aDiff = Math.Abs(Udelta[s] - U[s]);
                    if (aDiff > delta)
                    {
                        delta = aDiff;
                    }
                }

            } while (delta > minDelta);

            return U;
        }
        public static Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> initialPolicyVector( MDP mdp)
        {
            Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = new Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action>();
            List<CS8803AGA.PsychSim.State.Action> actions = new List<CS8803AGA.PsychSim.State.Action>();
            foreach (var s in mdp.GetStates()) {
            actions.Clear();
            foreach (var x in mdp.actions(s))
            {
                actions.Add(x);
            }

            if (actions.Count() > 0) {
                pi[s] =  Util.selectRandomlyFromList(actions);
            }
            }
            return pi;
        }
        public MDPPolicyIteration()
        {
            S = new State<Double>(3, 3, 0);
            Double[] val = { 1, 0, -1, 2, -1, -2, 3, -2, -3 };

            S.setContent(val);

            mdp = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S),
                   new TransitionProbabilityFunction(S),
                   new RewardFunction());

            double epsilon = 0.00001;

            PolicyEvaluation pev = new PolicyEvaluation(1000, epsilon);

            pi = new PolicyIteration(pev);

            policy = pi.policyIteration(mdp);
        }
        public static void values()
        {
            State<Double> S = new State<Double>(3, 3, -1.0);
            double r = -100;
            double epsilon = 0.00001;
            S.getCellAt(1, 3).setContent(r);
            S.getCellAt(3, 3).setContent(10.0);

            MDP mdp = new MDP(S.getCells(), S.getCellAt(1, 3), new ActionsFunction(S),
                    new TransitionProbabilityFunction(S),
                    new RewardFunction());

            ValueIteration vi = new ValueIteration(0.99);
            Dictionary<Cell<Double>, Double> map = vi.valueIteration(mdp, epsilon);
            foreach (var c in map)
            {
                Console.Write(c.Key.getX() + " " + c.Key.getY() + ": ");
                Console.WriteLine(c.Value);
                Console.WriteLine();
            }
        }
        public static void values()
        {
            LookupPolicy policy = null;
            State<Double> S = new State<Double>(3, 3, 0);

            MDP mdp1 = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S),
                               new TransitionProbabilityFunction(S),
                               new RewardFunction());

            State<Double> S1 = new State<Double>(3, 3, 0);
            S1.getCellAt(1, 1).setContent(-1);
            S1.getCellAt(1, 2).setContent(0);
            S1.getCellAt(1, 3).setContent(-1);

            S1.getCellAt(2, 1).setContent(-2);
            S1.getCellAt(2, 2).setContent(1);
            S1.getCellAt(2, 3).setContent(-2);

            S1.getCellAt(3, 1).setContent(-3);
            S1.getCellAt(3, 2).setContent(2);
            S1.getCellAt(3, 3).setContent(-3);

            MDP mdp2 = new MDP(S1.getCells(), S1.getCellAt(1, 1), new ActionsFunction(S1),
                                    new TransitionProbabilityFunction(S1),
                                    new RewardFunction());
            State<Double> S2 = new State<Double>(3, 3, 0);

            // double r = -100;
            double epsilon = 0.00001;
            S2.getCellAt(1, 1).setContent(1);
            S2.getCellAt(1, 2).setContent(0);
            S2.getCellAt(1, 3).setContent(-1);

            S2.getCellAt(2, 1).setContent(2);
            S2.getCellAt(2, 2).setContent(-1);
            S2.getCellAt(2, 3).setContent(-2);

            S2.getCellAt(3, 1).setContent(3);
            S2.getCellAt(3, 2).setContent(-2);
            S2.getCellAt(3, 3).setContent(-3);

            MDP mdp = new MDP(S2.getCells(), S2.getCellAt(1, 1), new ActionsFunction(S2),
                   new TransitionProbabilityFunction(S2),
                   new RewardFunction());

            PolicyEvaluationRecursive pev = new PolicyEvaluationRecursive(1000, epsilon);

            PolicyIterationRecursive pi = new PolicyIterationRecursive(pev);

            policy = pi.policyIteration(mdp, mdp1, mdp2);

            foreach (var s in S.getCells())
            {

                try
                {
                    CS8803AGA.PsychSim.State.Action a = policy.action(s);

                    Console.Write(s.getX() + " " + s.getY() + ": ");
                    Console.WriteLine(a.i);
                    Console.WriteLine();
                }
                catch (Exception e)
                {
                }
            }
        }