// // PRIVATE METHODS // private MDPUtilityFunction <STATE_TYPE> updateUtilityFunction(double gamma) { MDPUtilityFunction <STATE_TYPE> uf = utilityFunction.copy(); double u_s = utilityFunction.getUtility(previousState); double gammaUtilDIff = ((gamma * utilityFunction .getUtility(currentState)) - utilityFunction .getUtility(previousState)); double alphaTerm = stateCount.probabilityOf(previousState) * (previousReward + gammaUtilDIff); uf.setUtility(previousState, u_s + alphaTerm); return(uf); }
public void testPassiveTDAgent() { PassiveTDAgent <CellWorldPosition, String> agent = new PassiveTDAgent <CellWorldPosition, String>( fourByThree, policy); // Randomizer r = new JavaRandomizer(); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); MDPUtilityFunction <CellWorldPosition> uf = null; for (int i = 0; i < 200; i++) { agent.executeTrial(r); uf = agent.getUtilityFunction(); // System.Console.WriteLine(uf); } Assert.AreEqual(0.662, uf.getUtility(new CellWorldPosition(1, 1)), 0.001); Assert.AreEqual(0.610, uf.getUtility(new CellWorldPosition(1, 2)), 0.001); Assert.AreEqual(0.553, uf.getUtility(new CellWorldPosition(1, 3)), 0.001); Assert.AreEqual(0.496, uf.getUtility(new CellWorldPosition(1, 4)), 0.001); Assert.AreEqual(0.735, uf.getUtility(new CellWorldPosition(2, 1)), 0.001); Assert.AreEqual(0.835, uf.getUtility(new CellWorldPosition(2, 3)), 0.001); // AreEqual(-1.0, uf.getUtility(new // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never // gets to this square Assert.AreEqual(0.789, uf.getUtility(new CellWorldPosition(3, 1)), 0.001); Assert.AreEqual(0.889, uf.getUtility(new CellWorldPosition(3, 3)), 0.001); Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)), 0.001); }
public void testPassiveADPAgent() { PassiveADPAgent <CellWorldPosition, String> agent = new PassiveADPAgent <CellWorldPosition, String>( fourByThree, policy); // Randomizer r = new JavaRandomizer(); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); MDPUtilityFunction <CellWorldPosition> uf = null; for (int i = 0; i < 100; i++) { agent.executeTrial(r); uf = agent.getUtilityFunction(); } Assert.AreEqual(0.676, uf.getUtility(new CellWorldPosition(1, 1)), 0.001); Assert.AreEqual(0.626, uf.getUtility(new CellWorldPosition(1, 2)), 0.001); Assert.AreEqual(0.573, uf.getUtility(new CellWorldPosition(1, 3)), 0.001); Assert.AreEqual(0.519, uf.getUtility(new CellWorldPosition(1, 4)), 0.001); Assert.AreEqual(0.746, uf.getUtility(new CellWorldPosition(2, 1)), 0.001); Assert.AreEqual(0.865, uf.getUtility(new CellWorldPosition(2, 3)), 0.001); // AreEqual(-1.0, uf.getUtility(new // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never // gets to this square Assert.AreEqual(0.796, uf.getUtility(new CellWorldPosition(3, 1)), 0.001); Assert.AreEqual(0.906, uf.getUtility(new CellWorldPosition(3, 3)), 0.001); Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)), 0.001); }
// // PRIVATE METHODS // private MDPUtilityFunction <STATE_TYPE> valueDetermination( List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions, double gamma) { MDPUtilityFunction <STATE_TYPE> uf = utilityFunction.copy(); double additional = 0.0; if (validTransitions.Count > 0) { STATE_TYPE initState = validTransitions[0].getInitialState(); double reward = mdp.getRewardFor(initState); foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in validTransitions) { additional += mdp.getTransitionProbability(transition) * utilityFunction.getUtility(transition .getDestinationState()); } uf.setUtility(initState, reward + (gamma * additional)); } return(uf); }
public void testValueIterationInCellWorld() { MDPUtilityFunction <CellWorldPosition> uf = fourByThreeMDP .valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(1, 0.00001); // AIMA2e check against Fig 17.3 Assert.AreEqual(0.705, uf.getUtility(new CellWorldPosition(1, 1)), 0.001); Assert.AreEqual(0.655, uf.getUtility(new CellWorldPosition(1, 2)), 0.001); Assert.AreEqual(0.611, uf.getUtility(new CellWorldPosition(1, 3)), 0.001); Assert.AreEqual(0.388, uf.getUtility(new CellWorldPosition(1, 4)), 0.001); Assert.AreEqual(0.762, uf.getUtility(new CellWorldPosition(2, 1)), 0.001); Assert.AreEqual(0.660, uf.getUtility(new CellWorldPosition(2, 3)), 0.001); Assert.AreEqual(-1.0, uf.getUtility(new CellWorldPosition(2, 4)), 0.001); Assert.AreEqual(0.812, uf.getUtility(new CellWorldPosition(3, 1)), 0.001); Assert.AreEqual(0.868, uf.getUtility(new CellWorldPosition(3, 2)), 0.001); Assert.AreEqual(0.918, uf.getUtility(new CellWorldPosition(3, 3)), 0.001); Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)), 0.001); Assert.AreEqual(0.868, uf.getUtility(new CellWorldPosition(3, 2)), 0.001); }