Example #1
0
        public State<double> updateBelief(CS8803AGA.PsychSim.State.Action a, double evidance)
        {
            setBelief();

            State<Double> temp = new State<Double>(3, 3, 0);

            foreach (var s in mdp.GetStates())
            {
                foreach (var sDelta in mdp.GetStates())
                {
                    var t = temp.getCellAt(sDelta.getX(), sDelta.getY());
                    Double x = t.getContent();
                    x += mdp.transitionProbability(sDelta, s, a) * (belief.getCellAt(sDelta.getX(), sDelta.getY()).getContent());
                    t.setContent(x);
                }
            }

            double alpha = 1.56;

            foreach (var s in mdp.GetStates())
            {
                var b = belief.getCellAt(s.getX(), s.getY());
                Double x = alpha * (sensor.senses(evidance, s)) * (temp.getCellAt(s.getX(), s.getY()).getContent());
                b.setContent(x);
            }

            return belief;
        }
Example #2
0
 public static MDP createMDP(State<Double> state)
 {
     return new MDP(state.getCells(),
             state.getCellAt(1, 1), new ActionsFunction(state),
             new TransitionProbabilityFunction(state),
              new RewardFunction());
 }
        public static void values()
        {
            State<Double> S = new State<Double>(3, 3, -1.0);
            double r = -100;
            double epsilon = 0.00001;
            S.getCellAt(1, 3).setContent(r);
            S.getCellAt(3, 3).setContent(10.0);

            MDP mdp = new MDP(S.getCells(), S.getCellAt(1, 3), new ActionsFunction(S),
                    new TransitionProbabilityFunction(S),
                    new RewardFunction());

            ValueIteration vi = new ValueIteration(0.99);
            Dictionary<Cell<Double>, Double> map = vi.valueIteration(mdp, epsilon);
            foreach (var c in map)
            {
                Console.Write(c.Key.getX() + " " + c.Key.getY() + ": ");
                Console.WriteLine(c.Value);
                Console.WriteLine();
            }
        }
        public MDPPolicyIteration()
        {
            S = new State<Double>(3, 3, 0);
            Double[] val = { 1, 0, -1, 2, -1, -2, 3, -2, -3 };

            S.setContent(val);

            mdp = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S),
                   new TransitionProbabilityFunction(S),
                   new RewardFunction());

            double epsilon = 0.00001;

            PolicyEvaluation pev = new PolicyEvaluation(1000, epsilon);

            pi = new PolicyIteration(pev);

            policy = pi.policyIteration(mdp);
        }
        public static void values()
        {
            LookupPolicy policy = null;
            State<Double> S = new State<Double>(3, 3, 0);

            MDP mdp1 = new MDP(S.getCells(), S.getCellAt(1, 1), new ActionsFunction(S),
                               new TransitionProbabilityFunction(S),
                               new RewardFunction());

            State<Double> S1 = new State<Double>(3, 3, 0);
            S1.getCellAt(1, 1).setContent(-1);
            S1.getCellAt(1, 2).setContent(0);
            S1.getCellAt(1, 3).setContent(-1);

            S1.getCellAt(2, 1).setContent(-2);
            S1.getCellAt(2, 2).setContent(1);
            S1.getCellAt(2, 3).setContent(-2);

            S1.getCellAt(3, 1).setContent(-3);
            S1.getCellAt(3, 2).setContent(2);
            S1.getCellAt(3, 3).setContent(-3);

            MDP mdp2 = new MDP(S1.getCells(), S1.getCellAt(1, 1), new ActionsFunction(S1),
                                    new TransitionProbabilityFunction(S1),
                                    new RewardFunction());
            State<Double> S2 = new State<Double>(3, 3, 0);

            // double r = -100;
            double epsilon = 0.00001;
            S2.getCellAt(1, 1).setContent(1);
            S2.getCellAt(1, 2).setContent(0);
            S2.getCellAt(1, 3).setContent(-1);

            S2.getCellAt(2, 1).setContent(2);
            S2.getCellAt(2, 2).setContent(-1);
            S2.getCellAt(2, 3).setContent(-2);

            S2.getCellAt(3, 1).setContent(3);
            S2.getCellAt(3, 2).setContent(-2);
            S2.getCellAt(3, 3).setContent(-3);

            MDP mdp = new MDP(S2.getCells(), S2.getCellAt(1, 1), new ActionsFunction(S2),
                   new TransitionProbabilityFunction(S2),
                   new RewardFunction());

            PolicyEvaluationRecursive pev = new PolicyEvaluationRecursive(1000, epsilon);

            PolicyIterationRecursive pi = new PolicyIterationRecursive(pev);

            policy = pi.policyIteration(mdp, mdp1, mdp2);

            foreach (var s in S.getCells())
            {

                try
                {
                    CS8803AGA.PsychSim.State.Action a = policy.action(s);

                    Console.Write(s.getX() + " " + s.getY() + ": ");
                    Console.WriteLine(a.i);
                    Console.WriteLine();
                }
                catch (Exception e)
                {
                }
            }
        }
 public ActionsFunction(State<Double> state)
 {
     Cell<Double> c = state.getCellAt(3, 3);
     terminals.Add(c);
 }