public LookupPolicy policyIteration(MDP mdp, MDP mdp1, MDP mdp2) { Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), new Double()); Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = initialPolicyVector(mdp); bool unchanged; do { U = policyEvaluation.evaluate(pi, U, mdp, mdp1, mdp2); unchanged = true; foreach (var s in mdp.GetStates()) { if (pi.ContainsKey(s)) { double aMax = Double.MinValue, piVal = 0; CS8803AGA.PsychSim.State.Action aArgmax = pi[s]; foreach (var a in mdp.actions(s)) { double aSum = 0; foreach (var sDelta in mdp.GetStates()) { aSum += mdp.transitionProbability(sDelta, s, a) * U[sDelta]; } if (aSum > aMax) { aMax = aSum; aArgmax = a; } if (a.Equals(pi[s])) { piVal = aSum; } } if (aMax > piVal) { pi[s] = aArgmax; unchanged = false; } } } } while (!unchanged); return new LookupPolicy(pi); }
public Dictionary<Cell<Double>, Double> valueIteration(MDP mdp, double epsilon) { Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), 0.0); Dictionary<Cell<Double>, Double> Udelta = Util.create(mdp.GetStates(), 0.0); double delta = 0; double minDelta = epsilon * (1 - gamma) / gamma; do { Util.Merge(U, Udelta); delta = 0; foreach (Cell<Double> s in mdp.GetStates()) { HashSet<CS8803AGA.PsychSim.State.Action> actions = mdp.actions(s); double aMax = 0; if (actions.Count > 0) { aMax = Double.MinValue; } foreach (CS8803AGA.PsychSim.State.Action a in actions) { double aSum = 0; foreach (Cell<Double> sDelta in mdp.GetStates()) { aSum += mdp.transitionProbability(sDelta, s, a) * U[sDelta]; } if (aSum > aMax) { aMax = aSum; } } var val = mdp.reward(s) + gamma * aMax; Udelta[s] = val; double aDiff = Math.Abs(Udelta[s] - U[s]); if (aDiff > delta) { delta = aDiff; } } } while (delta > minDelta); return U; }
public static Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> initialPolicyVector( MDP mdp) { Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = new Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action>(); List<CS8803AGA.PsychSim.State.Action> actions = new List<CS8803AGA.PsychSim.State.Action>(); foreach (var s in mdp.GetStates()) { actions.Clear(); foreach (var x in mdp.actions(s)) { actions.Add(x); } if (actions.Count() > 0) { pi[s] = Util.selectRandomlyFromList(actions); } } return pi; }