Exemplo n.º 1
0
        public void testPolicyIteration()
        {
            MDPPolicy <CellWorldPosition, String> policy = fourByThreeMDP
                                                           .policyIteration(1);

            // AIMA2e check With Figure 17.2 (a)

            Assert
            .AreEqual("up", policy
                      .getAction(new CellWorldPosition(1, 1)));
            Assert
            .AreEqual("up", policy
                      .getAction(new CellWorldPosition(2, 1)));
            Assert.AreEqual("right", policy.getAction(new CellWorldPosition(3,
                                                                            1)));

            Assert.AreEqual("left", policy
                            .getAction(new CellWorldPosition(1, 2)));
            Assert.AreEqual("right", policy.getAction(new CellWorldPosition(3,
                                                                            2)));

            Assert.AreEqual("left", policy
                            .getAction(new CellWorldPosition(1, 3)));
            Assert
            .AreEqual("up", policy
                      .getAction(new CellWorldPosition(2, 3)));
            Assert.AreEqual("right", policy.getAction(new CellWorldPosition(3,
                                                                            3)));

            Assert.AreEqual("left", policy
                            .getAction(new CellWorldPosition(1, 4)));
        }
Exemplo n.º 2
0
 public PassiveTDAgent(MDP <STATE_TYPE, ACTION_TYPE> mdp,
                       MDPPolicy <STATE_TYPE, ACTION_TYPE> policy) : base(mdp.emptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <STATE_TYPE>();
     this.stateCount      = new FrequencyCounter <STATE_TYPE>();
 }
Exemplo n.º 3
0
 public PassiveTDAgent(MDP <TState, TAction> mdp,
                       MDPPolicy <TState, TAction> policy) : base(mdp.EmptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <TState>();
     this.stateCount      = new FrequencyCounter <TState>();
 }
Exemplo n.º 4
0
 public PassiveADPAgent(MDP <STATE_TYPE, ACTION_TYPE> mdp,
                        MDPPolicy <STATE_TYPE, ACTION_TYPE> policy) : base(mdp.emptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <STATE_TYPE>();
     this.nsa             = new Dictionary <Pair <STATE_TYPE, ACTION_TYPE>, Double>();
     this.nsasdash        = new Dictionary <MDPTransition <STATE_TYPE, ACTION_TYPE>, Double>();
 }
Exemplo n.º 5
0
 public PassiveADPAgent(MDP <TState, TAction> mdp,
                        MDPPolicy <TState, TAction> policy) : base(mdp.EmptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <TState>();
     this.nsa             = new Dictionary <Pair <TState, TAction>, double>();
     this.nsasdash        = new Dictionary <MDPTransition <TState, TAction>, double>();
 }
Exemplo n.º 6
0
        public MDPPolicy <TState, TAction> GetPolicy()
        {
            var policy = new MDPPolicy <TState, TAction>();
            var startingStatesRecorded = this.GetAllStartingStates();

            foreach (TState state in startingStatesRecorded)
            {
                TAction action = this.GetRecordedActionWithMaximumQValue(state);
                policy.SetAction(state, action);
            }
            return(policy);
        }
Exemplo n.º 7
0
        public void testPolicyEvaluation()
        {
            MDPPolicy <CellWorldPosition, String> policy = fourByThreeMDP
                                                           .randomPolicy();
            MDPUtilityFunction <CellWorldPosition> uf1 = fourByThreeMDP
                                                         .initialUtilityFunction();

            MDPUtilityFunction <CellWorldPosition> uf2 = fourByThreeMDP
                                                         .policyEvaluation(policy, uf1, 1, 3);

            Assert.IsFalse(uf1.Equals(uf2));
        }
Exemplo n.º 8
0
        public MDPPolicy <STATE_TYPE, ACTION_TYPE> getPolicy()
        {
            MDPPolicy <STATE_TYPE, ACTION_TYPE> policy = new MDPPolicy <STATE_TYPE, ACTION_TYPE>();
            List <STATE_TYPE> startingStatesRecorded   = getAllStartingStates();

            foreach (STATE_TYPE state in startingStatesRecorded)
            {
                ACTION_TYPE action = getRecordedActionWithMaximumQValue(state);
                policy.setAction(state, action);
            }
            return(policy);
        }
        public void setUp()
        {
            fourByThree = MDPFactory.createFourByThreeMDP();

            policy = new MDPPolicy<CellWorldPosition, String>();

            policy.setAction(new CellWorldPosition(1, 1), CellWorld.UP);
            policy.setAction(new CellWorldPosition(1, 2), CellWorld.LEFT);
            policy.setAction(new CellWorldPosition(1, 3), CellWorld.LEFT);
            policy.setAction(new CellWorldPosition(1, 4), CellWorld.LEFT);

            policy.setAction(new CellWorldPosition(2, 1), CellWorld.UP);
            policy.setAction(new CellWorldPosition(2, 3), CellWorld.UP);

            policy.setAction(new CellWorldPosition(3, 1), CellWorld.RIGHT);
            policy.setAction(new CellWorldPosition(3, 2), CellWorld.RIGHT);
            policy.setAction(new CellWorldPosition(3, 3), CellWorld.RIGHT);
        }
Exemplo n.º 10
0
        public void setUp()
        {
            fourByThree = MDPFactory.createFourByThreeMDP();

            policy = new MDPPolicy <CellWorldPosition, String>();

            policy.setAction(new CellWorldPosition(1, 1), CellWorld.UP);
            policy.setAction(new CellWorldPosition(1, 2), CellWorld.LEFT);
            policy.setAction(new CellWorldPosition(1, 3), CellWorld.LEFT);
            policy.setAction(new CellWorldPosition(1, 4), CellWorld.LEFT);

            policy.setAction(new CellWorldPosition(2, 1), CellWorld.UP);
            policy.setAction(new CellWorldPosition(2, 3), CellWorld.UP);

            policy.setAction(new CellWorldPosition(3, 1), CellWorld.RIGHT);
            policy.setAction(new CellWorldPosition(3, 2), CellWorld.RIGHT);
            policy.setAction(new CellWorldPosition(3, 3), CellWorld.RIGHT);
        }