public LookupPolicy policyIteration(MDP mdp, MDP mdp1, MDP mdp2)
        {
            Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), new Double());

            Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = initialPolicyVector(mdp);
            bool unchanged;

            do {

            U = policyEvaluation.evaluate(pi, U, mdp, mdp1, mdp2);

            unchanged = true;

            foreach (var s in mdp.GetStates())
            {
                if (pi.ContainsKey(s))
                {
                    double aMax = Double.MinValue, piVal = 0;

                    CS8803AGA.PsychSim.State.Action aArgmax = pi[s];
                    foreach (var a in mdp.actions(s))
                    {
                        double aSum = 0;
                        foreach (var sDelta in mdp.GetStates())
                        {
                            aSum += mdp.transitionProbability(sDelta, s, a)
                                    * U[sDelta];
                        }
                        if (aSum > aMax)
                        {
                            aMax = aSum;
                            aArgmax = a;
                        }

                        if (a.Equals(pi[s]))
                        {
                            piVal = aSum;
                        }
                    }

                    if (aMax > piVal)
                    {

                        pi[s] = aArgmax;

                        unchanged = false;
                    }
                }
            }

            } while (!unchanged);

            return new LookupPolicy(pi);
        }
        public Dictionary<Cell<Double>, Double> valueIteration(MDP mdp, double epsilon)
        {
            Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), 0.0);
            Dictionary<Cell<Double>, Double> Udelta = Util.create(mdp.GetStates(), 0.0);

            double delta = 0;

            double minDelta = epsilon * (1 - gamma) / gamma;

            do
            {

                Util.Merge(U, Udelta);

                delta = 0;

                foreach (Cell<Double> s in mdp.GetStates())
                {

                    HashSet<CS8803AGA.PsychSim.State.Action> actions = mdp.actions(s);

                    double aMax = 0;
                    if (actions.Count > 0)
                    {
                        aMax = Double.MinValue;
                    }
                    foreach (CS8803AGA.PsychSim.State.Action a in actions)
                    {

                        double aSum = 0;
                        foreach (Cell<Double> sDelta in mdp.GetStates())
                        {
                            aSum += mdp.transitionProbability(sDelta, s, a)
                                    * U[sDelta];
                        }
                        if (aSum > aMax)
                        {
                            aMax = aSum;
                        }
                    }
                    var val = mdp.reward(s) + gamma * aMax;
                    Udelta[s] = val;

                    double aDiff = Math.Abs(Udelta[s] - U[s]);
                    if (aDiff > delta)
                    {
                        delta = aDiff;
                    }
                }

            } while (delta > minDelta);

            return U;
        }
        public static Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> initialPolicyVector( MDP mdp)
        {
            Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = new Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action>();
            List<CS8803AGA.PsychSim.State.Action> actions = new List<CS8803AGA.PsychSim.State.Action>();
            foreach (var s in mdp.GetStates()) {
            actions.Clear();
            foreach (var x in mdp.actions(s))
            {
                actions.Add(x);
            }

            if (actions.Count() > 0) {
                pi[s] =  Util.selectRandomlyFromList(actions);
            }
            }
            return pi;
        }