Exemple #1
0
        //
        // PRIVATE METHODS
        //

        private MDPUtilityFunction <STATE_TYPE> updateUtilityFunction(double gamma)
        {
            MDPUtilityFunction <STATE_TYPE> uf = utilityFunction.copy();
            double u_s           = utilityFunction.getUtility(previousState);
            double gammaUtilDIff = ((gamma * utilityFunction
                                     .getUtility(currentState)) - utilityFunction
                                    .getUtility(previousState));
            double alphaTerm = stateCount.probabilityOf(previousState)
                               * (previousReward + gammaUtilDIff);

            uf.setUtility(previousState, u_s + alphaTerm);
            return(uf);
        }
        public void testPassiveTDAgent()
        {
            PassiveTDAgent <CellWorldPosition, String> agent = new PassiveTDAgent <CellWorldPosition, String>(
                fourByThree, policy);
            // Randomizer r = new JavaRandomizer();
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
                                                             0.3, 0.7, 0.4, 0.6, 0.5 });
            MDPUtilityFunction <CellWorldPosition> uf = null;

            for (int i = 0; i < 200; i++)
            {
                agent.executeTrial(r);
                uf = agent.getUtilityFunction();
                // System.Console.WriteLine(uf);
            }

            Assert.AreEqual(0.662, uf.getUtility(new CellWorldPosition(1, 1)),
                            0.001);
            Assert.AreEqual(0.610, uf.getUtility(new CellWorldPosition(1, 2)),
                            0.001);
            Assert.AreEqual(0.553, uf.getUtility(new CellWorldPosition(1, 3)),
                            0.001);
            Assert.AreEqual(0.496, uf.getUtility(new CellWorldPosition(1, 4)),
                            0.001);

            Assert.AreEqual(0.735, uf.getUtility(new CellWorldPosition(2, 1)),
                            0.001);
            Assert.AreEqual(0.835, uf.getUtility(new CellWorldPosition(2, 3)),
                            0.001);
            // AreEqual(-1.0, uf.getUtility(new
            // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never
            // gets to this square

            Assert.AreEqual(0.789, uf.getUtility(new CellWorldPosition(3, 1)),
                            0.001);
            Assert.AreEqual(0.889, uf.getUtility(new CellWorldPosition(3, 3)),
                            0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                            0.001);
        }
        public void testPassiveADPAgent()
        {
            PassiveADPAgent <CellWorldPosition, String> agent = new PassiveADPAgent <CellWorldPosition, String>(
                fourByThree, policy);

            // Randomizer r = new JavaRandomizer();
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
                                                             0.3, 0.7, 0.4, 0.6, 0.5 });
            MDPUtilityFunction <CellWorldPosition> uf = null;

            for (int i = 0; i < 100; i++)
            {
                agent.executeTrial(r);
                uf = agent.getUtilityFunction();
            }

            Assert.AreEqual(0.676, uf.getUtility(new CellWorldPosition(1, 1)),
                            0.001);
            Assert.AreEqual(0.626, uf.getUtility(new CellWorldPosition(1, 2)),
                            0.001);
            Assert.AreEqual(0.573, uf.getUtility(new CellWorldPosition(1, 3)),
                            0.001);
            Assert.AreEqual(0.519, uf.getUtility(new CellWorldPosition(1, 4)),
                            0.001);

            Assert.AreEqual(0.746, uf.getUtility(new CellWorldPosition(2, 1)),
                            0.001);
            Assert.AreEqual(0.865, uf.getUtility(new CellWorldPosition(2, 3)),
                            0.001);
            // AreEqual(-1.0, uf.getUtility(new
            // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never
            // gets to this square

            Assert.AreEqual(0.796, uf.getUtility(new CellWorldPosition(3, 1)),
                            0.001);
            Assert.AreEqual(0.906, uf.getUtility(new CellWorldPosition(3, 3)),
                            0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                            0.001);
        }
        //
        // PRIVATE METHODS
        //
        private MDPUtilityFunction <STATE_TYPE> valueDetermination(
            List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions,
            double gamma)
        {
            MDPUtilityFunction <STATE_TYPE> uf = utilityFunction.copy();
            double additional = 0.0;

            if (validTransitions.Count > 0)
            {
                STATE_TYPE initState = validTransitions[0].getInitialState();
                double     reward    = mdp.getRewardFor(initState);
                foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in validTransitions)
                {
                    additional += mdp.getTransitionProbability(transition)
                                  * utilityFunction.getUtility(transition
                                                               .getDestinationState());
                }
                uf.setUtility(initState, reward + (gamma * additional));
            }

            return(uf);
        }
        public void testValueIterationInCellWorld()
        {
            MDPUtilityFunction <CellWorldPosition> uf = fourByThreeMDP
                                                        .valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(1,
                                                                                                                     0.00001);

            // AIMA2e check against Fig 17.3
            Assert.AreEqual(0.705, uf.getUtility(new CellWorldPosition(1, 1)),
                            0.001);
            Assert.AreEqual(0.655, uf.getUtility(new CellWorldPosition(1, 2)),
                            0.001);
            Assert.AreEqual(0.611, uf.getUtility(new CellWorldPosition(1, 3)),
                            0.001);
            Assert.AreEqual(0.388, uf.getUtility(new CellWorldPosition(1, 4)),
                            0.001);

            Assert.AreEqual(0.762, uf.getUtility(new CellWorldPosition(2, 1)),
                            0.001);
            Assert.AreEqual(0.660, uf.getUtility(new CellWorldPosition(2, 3)),
                            0.001);
            Assert.AreEqual(-1.0, uf.getUtility(new CellWorldPosition(2, 4)),
                            0.001);

            Assert.AreEqual(0.812, uf.getUtility(new CellWorldPosition(3, 1)),
                            0.001);
            Assert.AreEqual(0.868, uf.getUtility(new CellWorldPosition(3, 2)),
                            0.001);
            Assert.AreEqual(0.918, uf.getUtility(new CellWorldPosition(3, 3)),
                            0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                            0.001);

            Assert.AreEqual(0.868, uf.getUtility(new CellWorldPosition(3, 2)),
                            0.001);
        }