public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Randomizer betweenEightyANdNinetyPercent = new MockRandomizer( new double[] { 0.85 }); // to force left to become an "up" qla.execute(action, betweenEightyANdNinetyPercent); Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState()); Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(2, 4), -1)); Assert.IsNull(action2); Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentUnderNormalProbability() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); Randomizer alwaysLessThanEightyPercent = new MockRandomizer( new double[] { 0.7 }); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); qla.execute(action, alwaysLessThanEightyPercent); Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState()); Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(1, 3), -0.04)); Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testCannotTransitionFromFinalState() { MDPTransitionModel<CellWorldPosition, String> mtm = cw .getTransitionModel(); CellWorldPosition terminalOne = new CellWorldPosition(2, 4); CellWorldPosition terminalTwo = new CellWorldPosition(3, 4); Assert.assertEquals(0.0, mtm.getTransitionProbability(terminalOne, CellWorld.UP, terminalTwo), 0.001); Assert.assertEquals(0.0, mtm.getTransitionProbability(terminalTwo, CellWorld.DOWN, terminalOne), 0.001); }
public void testTransitionModelCreation() { MDPTransitionModel<CellWorldPosition, String> mtm = cw .getTransitionModel(); CellWorldPosition startingPosition = new CellWorldPosition(1, 1); CellWorldPosition endingPosition = new CellWorldPosition(2, 1); Assert.assertEquals(0.8, mtm.getTransitionProbability(startingPosition, CellWorld.UP, endingPosition), 0.001); CellWorldPosition endingPosition2 = new CellWorldPosition(1, 1); Assert.assertEquals(0.1, mtm.getTransitionProbability(startingPosition, CellWorld.UP, endingPosition2), 0.001); CellWorldPosition endingPosition3 = new CellWorldPosition(1, 2); Assert.assertEquals(0.1, mtm.getTransitionProbability(startingPosition, CellWorld.UP, endingPosition3), 0.001); }
public void testTransitionProbabilityCalculationWhenBothRightAngledActiosnLeadToStartingPosition() { CellWorldPosition startingPosition = new CellWorldPosition(2, 1); CellWorldPosition endingPosition = new CellWorldPosition(2, 1); double transitionProb = cw.getTransitionProbability(startingPosition, CellWorld.UP, endingPosition); Assert.assertEquals(0.2, transitionProb, 0.001); }
public void testTransitionProbabilityCalculationWhenEndingPositionReachebleByExecutingSuggestedAction() { CellWorldPosition startingPosition = new CellWorldPosition(1, 1); CellWorldPosition endingPosition = new CellWorldPosition(2, 1); double transitionProb = cw.getTransitionProbability(startingPosition, CellWorld.UP, endingPosition); Assert.assertEquals(0.8, transitionProb, 0.001); }
public void testTransitionProbabilityCalculationWhenEndingPositionCannotBeReachedUsingDesiredActionOrRightAngledSteps() { CellWorldPosition startingPosition = new CellWorldPosition(1, 3); CellWorldPosition endingPosition = new CellWorldPosition(3, 3); double transitionProb = cw.getTransitionProbability(startingPosition, CellWorld.UP, endingPosition); Assert.assertEquals(0.0, transitionProb, 0.001); }
public void testTransitionProbabilityCalculationWhenEndingPositionIsNextToStartingPositionButIsBlocked() { CellWorldPosition startingPosition = new CellWorldPosition(2, 1); // to the left of blocked cell CellWorldPosition endingPosition = new CellWorldPosition(2, 2); // blocked // cell double transitionProb = cw.getTransitionProbability(startingPosition, CellWorld.RIGHT, endingPosition); Assert.assertEquals(0.0, transitionProb, 0.001); }