public void testPolicyIteration() { MDPPolicy <CellWorldPosition, String> policy = fourByThreeMDP .policyIteration(1); // AIMA2e check With Figure 17.2 (a) Assert .AreEqual("up", policy .getAction(new CellWorldPosition(1, 1))); Assert .AreEqual("up", policy .getAction(new CellWorldPosition(2, 1))); Assert.AreEqual("right", policy.getAction(new CellWorldPosition(3, 1))); Assert.AreEqual("left", policy .getAction(new CellWorldPosition(1, 2))); Assert.AreEqual("right", policy.getAction(new CellWorldPosition(3, 2))); Assert.AreEqual("left", policy .getAction(new CellWorldPosition(1, 3))); Assert .AreEqual("up", policy .getAction(new CellWorldPosition(2, 3))); Assert.AreEqual("right", policy.getAction(new CellWorldPosition(3, 3))); Assert.AreEqual("left", policy .getAction(new CellWorldPosition(1, 4))); }
public PassiveTDAgent(MDP <STATE_TYPE, ACTION_TYPE> mdp, MDPPolicy <STATE_TYPE, ACTION_TYPE> policy) : base(mdp.emptyMdp()) { this.policy = policy; this.utilityFunction = new MDPUtilityFunction <STATE_TYPE>(); this.stateCount = new FrequencyCounter <STATE_TYPE>(); }
public PassiveTDAgent(MDP <TState, TAction> mdp, MDPPolicy <TState, TAction> policy) : base(mdp.EmptyMdp()) { this.policy = policy; this.utilityFunction = new MDPUtilityFunction <TState>(); this.stateCount = new FrequencyCounter <TState>(); }
public PassiveADPAgent(MDP <STATE_TYPE, ACTION_TYPE> mdp, MDPPolicy <STATE_TYPE, ACTION_TYPE> policy) : base(mdp.emptyMdp()) { this.policy = policy; this.utilityFunction = new MDPUtilityFunction <STATE_TYPE>(); this.nsa = new Dictionary <Pair <STATE_TYPE, ACTION_TYPE>, Double>(); this.nsasdash = new Dictionary <MDPTransition <STATE_TYPE, ACTION_TYPE>, Double>(); }
public PassiveADPAgent(MDP <TState, TAction> mdp, MDPPolicy <TState, TAction> policy) : base(mdp.EmptyMdp()) { this.policy = policy; this.utilityFunction = new MDPUtilityFunction <TState>(); this.nsa = new Dictionary <Pair <TState, TAction>, double>(); this.nsasdash = new Dictionary <MDPTransition <TState, TAction>, double>(); }
public MDPPolicy <TState, TAction> GetPolicy() { var policy = new MDPPolicy <TState, TAction>(); var startingStatesRecorded = this.GetAllStartingStates(); foreach (TState state in startingStatesRecorded) { TAction action = this.GetRecordedActionWithMaximumQValue(state); policy.SetAction(state, action); } return(policy); }
public void testPolicyEvaluation() { MDPPolicy <CellWorldPosition, String> policy = fourByThreeMDP .randomPolicy(); MDPUtilityFunction <CellWorldPosition> uf1 = fourByThreeMDP .initialUtilityFunction(); MDPUtilityFunction <CellWorldPosition> uf2 = fourByThreeMDP .policyEvaluation(policy, uf1, 1, 3); Assert.IsFalse(uf1.Equals(uf2)); }
public MDPPolicy <STATE_TYPE, ACTION_TYPE> getPolicy() { MDPPolicy <STATE_TYPE, ACTION_TYPE> policy = new MDPPolicy <STATE_TYPE, ACTION_TYPE>(); List <STATE_TYPE> startingStatesRecorded = getAllStartingStates(); foreach (STATE_TYPE state in startingStatesRecorded) { ACTION_TYPE action = getRecordedActionWithMaximumQValue(state); policy.setAction(state, action); } return(policy); }
public void setUp() { fourByThree = MDPFactory.createFourByThreeMDP(); policy = new MDPPolicy<CellWorldPosition, String>(); policy.setAction(new CellWorldPosition(1, 1), CellWorld.UP); policy.setAction(new CellWorldPosition(1, 2), CellWorld.LEFT); policy.setAction(new CellWorldPosition(1, 3), CellWorld.LEFT); policy.setAction(new CellWorldPosition(1, 4), CellWorld.LEFT); policy.setAction(new CellWorldPosition(2, 1), CellWorld.UP); policy.setAction(new CellWorldPosition(2, 3), CellWorld.UP); policy.setAction(new CellWorldPosition(3, 1), CellWorld.RIGHT); policy.setAction(new CellWorldPosition(3, 2), CellWorld.RIGHT); policy.setAction(new CellWorldPosition(3, 3), CellWorld.RIGHT); }
public void setUp() { fourByThree = MDPFactory.createFourByThreeMDP(); policy = new MDPPolicy <CellWorldPosition, String>(); policy.setAction(new CellWorldPosition(1, 1), CellWorld.UP); policy.setAction(new CellWorldPosition(1, 2), CellWorld.LEFT); policy.setAction(new CellWorldPosition(1, 3), CellWorld.LEFT); policy.setAction(new CellWorldPosition(1, 4), CellWorld.LEFT); policy.setAction(new CellWorldPosition(2, 1), CellWorld.UP); policy.setAction(new CellWorldPosition(2, 3), CellWorld.UP); policy.setAction(new CellWorldPosition(3, 1), CellWorld.RIGHT); policy.setAction(new CellWorldPosition(3, 2), CellWorld.RIGHT); policy.setAction(new CellWorldPosition(3, 3), CellWorld.RIGHT); }