public void testFirstStepsOfQLAAgentWhenFirstStepTerminates()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>(
                fourByThree);

            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String            action           = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                                      startingPosition, -0.04));

            Assert.AreEqual(CellWorld.LEFT, action);

            Randomizer betweenEightyANdNinetyPercent = new MockRandomizer(
                new double[] { 0.85 });     // to force left to become an "up"

            qla.execute(action, betweenEightyANdNinetyPercent);
            Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState());
            Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);
            String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                  new CellWorldPosition(2, 4), -1));

            Assert.IsNull(action2);
            Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition,
                                                            action), 0.001);
        }
        public void testFirstStepsOfQLAAgentUnderNormalProbability()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>(
                fourByThree);

            Randomizer alwaysLessThanEightyPercent = new MockRandomizer(
                new double[] { 0.7 });
            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String            action           = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                                      startingPosition, -0.04));

            Assert.AreEqual(CellWorld.LEFT, action);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);

            qla.execute(action, alwaysLessThanEightyPercent);
            Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState());
            Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);
            String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                  new CellWorldPosition(1, 3), -0.04));

            Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition,
                                                             action), 0.001);
        }
        public void testQLearningAgent()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, string>(
                fourByThree);
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
                                                             0.3, 0.7, 0.4, 0.6, 0.5 });

            // Randomizer r = new JavaRandomizer();
            Dictionary <Pair <CellWorldPosition, String>, Double> q = null;
            QTable <CellWorldPosition, String> qTable = null;

            for (int i = 0; i < 100; i++)
            {
                qla.executeTrial(r);
                q      = qla.getQ();
                qTable = qla.getQTable();
            }
            // qTable.normalize();
            // System.Console.WriteLine(qTable);
            // System.Console.WriteLine(qTable.getPolicy());
        }
        public void testFirstStepsOfQLAAgentWhenFirstStepTerminates()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>(
                    fourByThree);

            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String action = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    startingPosition, -0.04));
            Assert.AreEqual(CellWorld.LEFT, action);

            Randomizer betweenEightyANdNinetyPercent = new MockRandomizer(
                    new double[] { 0.85 }); // to force left to become an "up"
            qla.execute(action, betweenEightyANdNinetyPercent);
            Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState());
            Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
            String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    new CellWorldPosition(2, 4), -1));
            Assert.IsNull(action2);
            Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
        }
        public void testFirstStepsOfQLAAgentUnderNormalProbability()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>(
                    fourByThree);

            Randomizer alwaysLessThanEightyPercent = new MockRandomizer(
                    new double[] { 0.7 });
            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String action = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    startingPosition, -0.04));
            Assert.AreEqual(CellWorld.LEFT, action);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);

            qla.execute(action, alwaysLessThanEightyPercent);
            Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState());
            Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
            String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    new CellWorldPosition(1, 3), -0.04));

            Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
        }
        public void testQLearningAgent()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition,string>(
                    fourByThree);
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
				0.3, 0.7, 0.4, 0.6, 0.5 });

            // Randomizer r = new JavaRandomizer();
            Dictionary<Pair<CellWorldPosition, String>, Double> q = null;
            QTable<CellWorldPosition, String> qTable = null;
            for (int i = 0; i < 100; i++)
            {
                qla.executeTrial(r);
                q = qla.getQ();
                qTable = qla.getQTable();

            }
            // qTable.normalize();
            // System.Console.WriteLine(qTable);
            // System.Console.WriteLine(qTable.getPolicy());
        }