public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Randomizer betweenEightyANdNinetyPercent = new MockRandomizer( new double[] { 0.85 }); // to force left to become an "up" qla.execute(action, betweenEightyANdNinetyPercent); Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState()); Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(2, 4), -1)); Assert.IsNull(action2); Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentUnderNormalProbability() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); Randomizer alwaysLessThanEightyPercent = new MockRandomizer( new double[] { 0.7 }); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); qla.execute(action, alwaysLessThanEightyPercent); Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState()); Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(1, 3), -0.04)); Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testQLearningAgent() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, string>( fourByThree); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); // Randomizer r = new JavaRandomizer(); Dictionary <Pair <CellWorldPosition, String>, Double> q = null; QTable <CellWorldPosition, String> qTable = null; for (int i = 0; i < 100; i++) { qla.executeTrial(r); q = qla.getQ(); qTable = qla.getQTable(); } // qTable.normalize(); // System.Console.WriteLine(qTable); // System.Console.WriteLine(qTable.getPolicy()); }
public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>( fourByThree); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception<CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Randomizer betweenEightyANdNinetyPercent = new MockRandomizer( new double[] { 0.85 }); // to force left to become an "up" qla.execute(action, betweenEightyANdNinetyPercent); Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState()); Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>( new CellWorldPosition(2, 4), -1)); Assert.IsNull(action2); Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentUnderNormalProbability() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>( fourByThree); Randomizer alwaysLessThanEightyPercent = new MockRandomizer( new double[] { 0.7 }); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception<CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); qla.execute(action, alwaysLessThanEightyPercent); Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState()); Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>( new CellWorldPosition(1, 3), -0.04)); Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testQLearningAgent() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition,string>( fourByThree); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); // Randomizer r = new JavaRandomizer(); Dictionary<Pair<CellWorldPosition, String>, Double> q = null; QTable<CellWorldPosition, String> qTable = null; for (int i = 0; i < 100; i++) { qla.executeTrial(r); q = qla.getQ(); qTable = qla.getQTable(); } // qTable.normalize(); // System.Console.WriteLine(qTable); // System.Console.WriteLine(qTable.getPolicy()); }