public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Randomizer betweenEightyANdNinetyPercent = new MockRandomizer( new double[] { 0.85 }); // to force left to become an "up" qla.execute(action, betweenEightyANdNinetyPercent); Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState()); Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(2, 4), -1)); Assert.IsNull(action2); Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentUnderNormalProbability() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); Randomizer alwaysLessThanEightyPercent = new MockRandomizer( new double[] { 0.7 }); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); qla.execute(action, alwaysLessThanEightyPercent); Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState()); Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(1, 3), -0.04)); Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>( fourByThree); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception<CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Randomizer betweenEightyANdNinetyPercent = new MockRandomizer( new double[] { 0.85 }); // to force left to become an "up" qla.execute(action, betweenEightyANdNinetyPercent); Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState()); Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>( new CellWorldPosition(2, 4), -1)); Assert.IsNull(action2); Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentUnderNormalProbability() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>( fourByThree); Randomizer alwaysLessThanEightyPercent = new MockRandomizer( new double[] { 0.7 }); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception<CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); qla.execute(action, alwaysLessThanEightyPercent); Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState()); Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>( new CellWorldPosition(1, 3), -0.04)); Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition, action), 0.001); }