public void testFirstStepsOfQLAAgentWhenFirstStepTerminates()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>(
                fourByThree);

            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String            action           = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                                      startingPosition, -0.04));

            Assert.AreEqual(CellWorld.LEFT, action);

            Randomizer betweenEightyANdNinetyPercent = new MockRandomizer(
                new double[] { 0.85 });     // to force left to become an "up"

            qla.execute(action, betweenEightyANdNinetyPercent);
            Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState());
            Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);
            String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                  new CellWorldPosition(2, 4), -1));

            Assert.IsNull(action2);
            Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition,
                                                            action), 0.001);
        }
        public void testFirstStepsOfQLAAgentUnderNormalProbability()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>(
                fourByThree);

            Randomizer alwaysLessThanEightyPercent = new MockRandomizer(
                new double[] { 0.7 });
            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String            action           = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                                      startingPosition, -0.04));

            Assert.AreEqual(CellWorld.LEFT, action);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);

            qla.execute(action, alwaysLessThanEightyPercent);
            Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState());
            Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);
            String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                  new CellWorldPosition(1, 3), -0.04));

            Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition,
                                                             action), 0.001);
        }
        public void testFirstStepsOfQLAAgentWhenFirstStepTerminates()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>(
                    fourByThree);

            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String action = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    startingPosition, -0.04));
            Assert.AreEqual(CellWorld.LEFT, action);

            Randomizer betweenEightyANdNinetyPercent = new MockRandomizer(
                    new double[] { 0.85 }); // to force left to become an "up"
            qla.execute(action, betweenEightyANdNinetyPercent);
            Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState());
            Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
            String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    new CellWorldPosition(2, 4), -1));
            Assert.IsNull(action2);
            Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
        }
        public void testFirstStepsOfQLAAgentUnderNormalProbability()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>(
                    fourByThree);

            Randomizer alwaysLessThanEightyPercent = new MockRandomizer(
                    new double[] { 0.7 });
            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String action = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    startingPosition, -0.04));
            Assert.AreEqual(CellWorld.LEFT, action);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);

            qla.execute(action, alwaysLessThanEightyPercent);
            Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState());
            Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
            String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    new CellWorldPosition(1, 3), -0.04));

            Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
        }