public Dictionary<Cell<Double>, Double> evaluate(Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi_i, Dictionary<Cell<Double>, Double> U,
            MDP mdp)
        {
            Dictionary<Cell<Double>, Double> U_i = new Dictionary<Cell<Double>, Double>(U);
            Dictionary<Cell<Double>, Double> U_ip1 = new Dictionary<Cell<Double>, Double>(U);

            for (int i = 0; i < k; i++)
            {

                foreach (var s in U.Keys)
                {

                    double aSum = 0;
                    if (pi_i.ContainsKey(s))
                    {
                        CS8803AGA.PsychSim.State.Action ap_i = pi_i[s];
                        foreach (var sDelta in U.Keys)
                        {
                            aSum += mdp.transitionProbability(sDelta, s, ap_i)
                                    * U_i[sDelta];
                        }
                    }
                    U_ip1[s] = (mdp.reward(s) + gamma * aSum);
                }

                Util.Merge(U_i, U_ip1);
            }
            return U_ip1;
        }
        public Dictionary<Cell<Double>, Double> valueIteration(MDP mdp, double epsilon)
        {
            Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), 0.0);
            Dictionary<Cell<Double>, Double> Udelta = Util.create(mdp.GetStates(), 0.0);

            double delta = 0;

            double minDelta = epsilon * (1 - gamma) / gamma;

            do
            {

                Util.Merge(U, Udelta);

                delta = 0;

                foreach (Cell<Double> s in mdp.GetStates())
                {

                    HashSet<CS8803AGA.PsychSim.State.Action> actions = mdp.actions(s);

                    double aMax = 0;
                    if (actions.Count > 0)
                    {
                        aMax = Double.MinValue;
                    }
                    foreach (CS8803AGA.PsychSim.State.Action a in actions)
                    {

                        double aSum = 0;
                        foreach (Cell<Double> sDelta in mdp.GetStates())
                        {
                            aSum += mdp.transitionProbability(sDelta, s, a)
                                    * U[sDelta];
                        }
                        if (aSum > aMax)
                        {
                            aMax = aSum;
                        }
                    }
                    var val = mdp.reward(s) + gamma * aMax;
                    Udelta[s] = val;

                    double aDiff = Math.Abs(Udelta[s] - U[s]);
                    if (aDiff > delta)
                    {
                        delta = aDiff;
                    }
                }

            } while (delta > minDelta);

            return U;
        }