public Dictionary<Cell<Double>, Double> evaluate(Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi_i, Dictionary<Cell<Double>, Double> U, MDP mdp) { Dictionary<Cell<Double>, Double> U_i = new Dictionary<Cell<Double>, Double>(U); Dictionary<Cell<Double>, Double> U_ip1 = new Dictionary<Cell<Double>, Double>(U); for (int i = 0; i < k; i++) { foreach (var s in U.Keys) { double aSum = 0; if (pi_i.ContainsKey(s)) { CS8803AGA.PsychSim.State.Action ap_i = pi_i[s]; foreach (var sDelta in U.Keys) { aSum += mdp.transitionProbability(sDelta, s, ap_i) * U_i[sDelta]; } } U_ip1[s] = (mdp.reward(s) + gamma * aSum); } Util.Merge(U_i, U_ip1); } return U_ip1; }
public POMDP(MDP mdp) { this.mdp = mdp; belief = new State<Double>(3, 3, 0); sensor = new Sensor(); belief.setContent(new double[] { 0.311, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0 }); }
public LookupPolicy policyIteration(MDP mdp, MDP mdp1, MDP mdp2) { Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), new Double()); Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = initialPolicyVector(mdp); bool unchanged; do { U = policyEvaluation.evaluate(pi, U, mdp, mdp1, mdp2); unchanged = true; foreach (var s in mdp.GetStates()) { if (pi.ContainsKey(s)) { double aMax = Double.MinValue, piVal = 0; CS8803AGA.PsychSim.State.Action aArgmax = pi[s]; foreach (var a in mdp.actions(s)) { double aSum = 0; foreach (var sDelta in mdp.GetStates()) { aSum += mdp.transitionProbability(sDelta, s, a) * U[sDelta]; } if (aSum > aMax) { aMax = aSum; aArgmax = a; } if (a.Equals(pi[s])) { piVal = aSum; } } if (aMax > piVal) { pi[s] = aArgmax; unchanged = false; } } } } while (!unchanged); return new LookupPolicy(pi); }
public Dictionary<Cell<Double>, Double> valueIteration(MDP mdp, double epsilon) { Dictionary<Cell<Double>, Double> U = Util.create(mdp.GetStates(), 0.0); Dictionary<Cell<Double>, Double> Udelta = Util.create(mdp.GetStates(), 0.0); double delta = 0; double minDelta = epsilon * (1 - gamma) / gamma; do { Util.Merge(U, Udelta); delta = 0; foreach (Cell<Double> s in mdp.GetStates()) { HashSet<CS8803AGA.PsychSim.State.Action> actions = mdp.actions(s); double aMax = 0; if (actions.Count > 0) { aMax = Double.MinValue; } foreach (CS8803AGA.PsychSim.State.Action a in actions) { double aSum = 0; foreach (Cell<Double> sDelta in mdp.GetStates()) { aSum += mdp.transitionProbability(sDelta, s, a) * U[sDelta]; } if (aSum > aMax) { aMax = aSum; } } var val = mdp.reward(s) + gamma * aMax; Udelta[s] = val; double aDiff = Math.Abs(Udelta[s] - U[s]); if (aDiff > delta) { delta = aDiff; } } } while (delta > minDelta); return U; }
public static Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> initialPolicyVector( MDP mdp) { Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action> pi = new Dictionary<Cell<Double>, CS8803AGA.PsychSim.State.Action>(); List<CS8803AGA.PsychSim.State.Action> actions = new List<CS8803AGA.PsychSim.State.Action>(); foreach (var s in mdp.GetStates()) { actions.Clear(); foreach (var x in mdp.actions(s)) { actions.Add(x); } if (actions.Count() > 0) { pi[s] = Util.selectRandomlyFromList(actions); } } return pi; }
public MDPPolicyIteration() { S = new State<Double>(3, 3, 0); Double[] val = { 1, 0, -1, 2, -1, -2, 3, -2, -3 }; S.setContent(val); mdp = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S), new TransitionProbabilityFunction(S), new RewardFunction()); double epsilon = 0.00001; PolicyEvaluation pev = new PolicyEvaluation(1000, epsilon); pi = new PolicyIteration(pev); policy = pi.policyIteration(mdp); }
public static void values() { State<Double> S = new State<Double>(3, 3, -1.0); double r = -100; double epsilon = 0.00001; S.getCellAt(1, 3).setContent(r); S.getCellAt(3, 3).setContent(10.0); MDP mdp = new MDP(S.getCells(), S.getCellAt(1, 3), new ActionsFunction(S), new TransitionProbabilityFunction(S), new RewardFunction()); ValueIteration vi = new ValueIteration(0.99); Dictionary<Cell<Double>, Double> map = vi.valueIteration(mdp, epsilon); foreach (var c in map) { Console.Write(c.Key.getX() + " " + c.Key.getY() + ": "); Console.WriteLine(c.Value); Console.WriteLine(); } }
public static void values() { LookupPolicy policy = null; State<Double> S = new State<Double>(3, 3, 0); MDP mdp1 = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S), new TransitionProbabilityFunction(S), new RewardFunction()); State<Double> S1 = new State<Double>(3, 3, 0); S1.getCellAt(1, 1).setContent(-1); S1.getCellAt(1, 2).setContent(0); S1.getCellAt(1, 3).setContent(-1); S1.getCellAt(2, 1).setContent(-2); S1.getCellAt(2, 2).setContent(1); S1.getCellAt(2, 3).setContent(-2); S1.getCellAt(3, 1).setContent(-3); S1.getCellAt(3, 2).setContent(2); S1.getCellAt(3, 3).setContent(-3); MDP mdp2 = new MDP(S1.getCells(), S1.getCellAt(1, 1), new ActionsFunction(S1), new TransitionProbabilityFunction(S1), new RewardFunction()); State<Double> S2 = new State<Double>(3, 3, 0); // double r = -100; double epsilon = 0.00001; S2.getCellAt(1, 1).setContent(1); S2.getCellAt(1, 2).setContent(0); S2.getCellAt(1, 3).setContent(-1); S2.getCellAt(2, 1).setContent(2); S2.getCellAt(2, 2).setContent(-1); S2.getCellAt(2, 3).setContent(-2); S2.getCellAt(3, 1).setContent(3); S2.getCellAt(3, 2).setContent(-2); S2.getCellAt(3, 3).setContent(-3); MDP mdp = new MDP(S2.getCells(), S2.getCellAt(1, 1), new ActionsFunction(S2), new TransitionProbabilityFunction(S2), new RewardFunction()); PolicyEvaluationRecursive pev = new PolicyEvaluationRecursive(1000, epsilon); PolicyIterationRecursive pi = new PolicyIterationRecursive(pev); policy = pi.policyIteration(mdp, mdp1, mdp2); foreach (var s in S.getCells()) { try { CS8803AGA.PsychSim.State.Action a = policy.action(s); Console.Write(s.getX() + " " + s.getY() + ": "); Console.WriteLine(a.i); Console.WriteLine(); } catch (Exception e) { } } }