static void passiveTDAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateMap <Cell <double>, CellWorldAction>(); fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left); PassiveTDAgent <Cell <double>, CellWorldAction> ptda = new PassiveTDAgent <Cell <double>, CellWorldAction>(fixedPolicy, 0.2, 1.0); cwe.AddAgent(ptda); output_utility_learning_rates(ptda, 20, 500, 100, 1); }
public void testPassiveTDAgent() { PassiveTDAgent <CellWorldPosition, String> agent = new PassiveTDAgent <CellWorldPosition, String>( fourByThree, policy); // Randomizer r = new JavaRandomizer(); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); MDPUtilityFunction <CellWorldPosition> uf = null; for (int i = 0; i < 200; i++) { agent.executeTrial(r); uf = agent.getUtilityFunction(); // System.Console.WriteLine(uf); } Assert.AreEqual(0.662, uf.getUtility(new CellWorldPosition(1, 1)), 0.001); Assert.AreEqual(0.610, uf.getUtility(new CellWorldPosition(1, 2)), 0.001); Assert.AreEqual(0.553, uf.getUtility(new CellWorldPosition(1, 3)), 0.001); Assert.AreEqual(0.496, uf.getUtility(new CellWorldPosition(1, 4)), 0.001); Assert.AreEqual(0.735, uf.getUtility(new CellWorldPosition(2, 1)), 0.001); Assert.AreEqual(0.835, uf.getUtility(new CellWorldPosition(2, 3)), 0.001); // AreEqual(-1.0, uf.getUtility(new // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never // gets to this square Assert.AreEqual(0.789, uf.getUtility(new CellWorldPosition(3, 1)), 0.001); Assert.AreEqual(0.889, uf.getUtility(new CellWorldPosition(3, 3)), 0.001); Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)), 0.001); }
public void testPassiveTDAgent() { PassiveTDAgent<CellWorldPosition, String> agent = new PassiveTDAgent<CellWorldPosition, String>( fourByThree, policy); // Randomizer r = new JavaRandomizer(); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); MDPUtilityFunction<CellWorldPosition> uf = null; for (int i = 0; i < 200; i++) { agent.executeTrial(r); uf = agent.getUtilityFunction(); // System.Console.WriteLine(uf); } Assert.AreEqual(0.662, uf.getUtility(new CellWorldPosition(1, 1)), 0.001); Assert.AreEqual(0.610, uf.getUtility(new CellWorldPosition(1, 2)), 0.001); Assert.AreEqual(0.553, uf.getUtility(new CellWorldPosition(1, 3)), 0.001); Assert.AreEqual(0.496, uf.getUtility(new CellWorldPosition(1, 4)), 0.001); Assert.AreEqual(0.735, uf.getUtility(new CellWorldPosition(2, 1)), 0.001); Assert.AreEqual(0.835, uf.getUtility(new CellWorldPosition(2, 3)), 0.001); // AreEqual(-1.0, uf.getUtility(new // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never // gets to this square Assert.AreEqual(0.789, uf.getUtility(new CellWorldPosition(3, 1)), 0.001); Assert.AreEqual(0.889, uf.getUtility(new CellWorldPosition(3, 3)), 0.001); Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)), 0.001); }