示例#1
0
        static void passiveTDAgentDemo()
        {
            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateMap <Cell <double>, CellWorldAction>();

            fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left);

            PassiveTDAgent <Cell <double>, CellWorldAction> ptda
                = new PassiveTDAgent <Cell <double>, CellWorldAction>(fixedPolicy, 0.2, 1.0);

            cwe.AddAgent(ptda);

            output_utility_learning_rates(ptda, 20, 500, 100, 1);
        }
        public void testPassiveTDAgent()
        {
            PassiveTDAgent <CellWorldPosition, String> agent = new PassiveTDAgent <CellWorldPosition, String>(
                fourByThree, policy);
            // Randomizer r = new JavaRandomizer();
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
                                                             0.3, 0.7, 0.4, 0.6, 0.5 });
            MDPUtilityFunction <CellWorldPosition> uf = null;

            for (int i = 0; i < 200; i++)
            {
                agent.executeTrial(r);
                uf = agent.getUtilityFunction();
                // System.Console.WriteLine(uf);
            }

            Assert.AreEqual(0.662, uf.getUtility(new CellWorldPosition(1, 1)),
                            0.001);
            Assert.AreEqual(0.610, uf.getUtility(new CellWorldPosition(1, 2)),
                            0.001);
            Assert.AreEqual(0.553, uf.getUtility(new CellWorldPosition(1, 3)),
                            0.001);
            Assert.AreEqual(0.496, uf.getUtility(new CellWorldPosition(1, 4)),
                            0.001);

            Assert.AreEqual(0.735, uf.getUtility(new CellWorldPosition(2, 1)),
                            0.001);
            Assert.AreEqual(0.835, uf.getUtility(new CellWorldPosition(2, 3)),
                            0.001);
            // AreEqual(-1.0, uf.getUtility(new
            // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never
            // gets to this square

            Assert.AreEqual(0.789, uf.getUtility(new CellWorldPosition(3, 1)),
                            0.001);
            Assert.AreEqual(0.889, uf.getUtility(new CellWorldPosition(3, 3)),
                            0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                            0.001);
        }
        public void testPassiveTDAgent()
        {
            PassiveTDAgent<CellWorldPosition, String> agent = new PassiveTDAgent<CellWorldPosition, String>(
                    fourByThree, policy);
            // Randomizer r = new JavaRandomizer();
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
				0.3, 0.7, 0.4, 0.6, 0.5 });
            MDPUtilityFunction<CellWorldPosition> uf = null;
            for (int i = 0; i < 200; i++)
            {
                agent.executeTrial(r);
                uf = agent.getUtilityFunction();
                // System.Console.WriteLine(uf);

            }

            Assert.AreEqual(0.662, uf.getUtility(new CellWorldPosition(1, 1)),
                    0.001);
            Assert.AreEqual(0.610, uf.getUtility(new CellWorldPosition(1, 2)),
                    0.001);
            Assert.AreEqual(0.553, uf.getUtility(new CellWorldPosition(1, 3)),
                    0.001);
            Assert.AreEqual(0.496, uf.getUtility(new CellWorldPosition(1, 4)),
                    0.001);

            Assert.AreEqual(0.735, uf.getUtility(new CellWorldPosition(2, 1)),
                    0.001);
            Assert.AreEqual(0.835, uf.getUtility(new CellWorldPosition(2, 3)),
                    0.001);
            // AreEqual(-1.0, uf.getUtility(new
            // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never
            // gets to this square

            Assert.AreEqual(0.789, uf.getUtility(new CellWorldPosition(3, 1)),
                    0.001);
            Assert.AreEqual(0.889, uf.getUtility(new CellWorldPosition(3, 3)),
                    0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                    0.001);
        }