Ejemplo n.º 1
0
        static void passiveTDAgentDemo()
        {
            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateMap <Cell <double>, CellWorldAction>();

            fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left);

            PassiveTDAgent <Cell <double>, CellWorldAction> ptda
                = new PassiveTDAgent <Cell <double>, CellWorldAction>(fixedPolicy, 0.2, 1.0);

            cwe.AddAgent(ptda);

            output_utility_learning_rates(ptda, 20, 500, 100, 1);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4
        /// x 3 environment that presents the agent with a sequential decision
        /// problem.
        /// </summary>
        /// <returns>a cell world representation of Fig 17.1 in AIMA3e.</returns>
        public static CellWorld <double> CreateCellWorldForFig17_1()
        {
            CellWorld <double> cw = new CellWorld <double>(4, 3, -0.04);

            cw.RemoveCell(2, 2);

            cw.GetCellAt(4, 3).setContent(1.0);
            cw.GetCellAt(4, 2).setContent(-1.0);

            return(cw);
        }
Ejemplo n.º 3
0
        /**
         * Returns the allowed actions from a specified cell within the cell world
         * described in Fig 17.1.
         *
         * @param cw
         *            the cell world from figure 17.1.
         * @return the set of actions allowed at a particular cell. This set will be
         *         empty if at a terminal state.
         */
        public static IActionsFunction <Cell <double>, CellWorldAction> createActionsFunctionForFigure17_1(CellWorld <double> cw)
        {
            ISet <Cell <double> > terminals = CollectionFactory.CreateSet <Cell <double> >();

            terminals.Add(cw.GetCellAt(4, 3));
            terminals.Add(cw.GetCellAt(4, 2));

            IActionsFunction <Cell <double>, CellWorldAction> af = new createActionsFunctionForFigure17_1ActionsFunction(terminals);

            return(af);
        }
Ejemplo n.º 4
0
 /// <summary>
 /// Constructs an MDP that can be used to generate the utility values detailed in Fig 17.3.
 /// </summary>
 /// <param name="cw">the cell world from figure 17.1.</param>
 /// <returns>an MDP that can be used to generate the utility values detailed in Fig 17.3.</returns>
 public static IMarkovDecisionProcess <Cell <double>, CellWorldAction> createMDPForFigure17_3(CellWorld <double> cw)
 {
     return(new MDP <Cell <double>, CellWorldAction>(cw.GetCells(),
                                                     cw.GetCellAt(1, 1), createActionsFunctionForFigure17_1(cw),
                                                     createTransitionProbabilityFunctionForFigure17_1(cw),
                                                     createRewardFunctionForFigure17_1()));
 }
Ejemplo n.º 5
0
        static void qLearningAgentDemo()
        {
            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            QLearningAgent <Cell <double>, CellWorldAction> qla = new QLearningAgent <Cell <double>, CellWorldAction>(
                MDPFactory.createActionsFunctionForFigure17_1(cw),
                CellWorldAction.None, 0.2, 1.0, 5,
                2.0);

            cwe.AddAgent(qla);

            output_utility_learning_rates(qla, 20, 10000, 500, 20);
        }
Ejemplo n.º 6
0
        public void testValueIterationForFig17_3()
        {
            IMap<Cell<double>, double> U = vi.valueIteration(mdp, 0.0001);

            Assert.AreEqual(0.705, U.Get(cw.GetCellAt(1, 1)), DELTA_THRESHOLD);
            Assert.AreEqual(0.762, U.Get(cw.GetCellAt(1, 2)), DELTA_THRESHOLD);
            Assert.AreEqual(0.812, U.Get(cw.GetCellAt(1, 3)), DELTA_THRESHOLD);
            Assert.AreEqual(0.655, U.Get(cw.GetCellAt(2, 1)), DELTA_THRESHOLD);
            Assert.AreEqual(0.868, U.Get(cw.GetCellAt(2, 3)), DELTA_THRESHOLD);
            Assert.AreEqual(0.611, U.Get(cw.GetCellAt(3, 1)), DELTA_THRESHOLD);
            Assert.AreEqual(0.660, U.Get(cw.GetCellAt(3, 2)), DELTA_THRESHOLD);
            Assert.AreEqual(0.918, U.Get(cw.GetCellAt(3, 3)), DELTA_THRESHOLD);
            Assert.AreEqual(0.388, U.Get(cw.GetCellAt(4, 1)), DELTA_THRESHOLD);
            Assert.AreEqual(-1.0, U.Get(cw.GetCellAt(4, 2)), DELTA_THRESHOLD);
            Assert.AreEqual(1.0, U.Get(cw.GetCellAt(4, 3)), DELTA_THRESHOLD);
        }
Ejemplo n.º 7
0
        public void testMoveUpIntoAdjacentCellChangesPositionCorrectly()
        {
            Cell <double> sDelta = cw.Result(cw.GetCellAt(1, 1), CellWorldAction.Up);

            Assert.AreEqual(1, sDelta.getX());
            Assert.AreEqual(2, sDelta.getY());
        }
Ejemplo n.º 8
0
        public void testMDPTransitionModel()
        {
            Assert.AreEqual(0.8, mdp.transitionProbability(cw.GetCellAt(1, 2),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Up));
            Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Up));
            Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(2, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Up));
            Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(1, 3),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Up));

            Assert.AreEqual(0.9, mdp.transitionProbability(cw.GetCellAt(1, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Down));
            Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(2, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Down));
            Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(3, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Down));
            Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(1, 2),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Down));

            Assert.AreEqual(0.9, mdp.transitionProbability(cw.GetCellAt(1, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Left));
            Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(2, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Left));
            Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(3, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Left));
            Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 2),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Left));

            Assert.AreEqual(0.8, mdp.transitionProbability(cw.GetCellAt(2, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Right));
            Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 1),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Right));
            Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 2),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Right));
            Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(1, 3),
                                                           cw.GetCellAt(1, 1), CellWorldAction.Right));
        }
Ejemplo n.º 9
0
        public void testPolicyIterationForFig17_2()
        {
            // AIMA3e check with Figure 17.2 (a)
            IPolicy <Cell <double>, CellWorldAction> policy = pi.policyIteration(mdp);

            Assert.AreEqual(CellWorldAction.Up,
                            policy.action(cw.GetCellAt(1, 1)));
            Assert.AreEqual(CellWorldAction.Up,
                            policy.action(cw.GetCellAt(1, 2)));
            Assert.AreEqual(CellWorldAction.Right,
                            policy.action(cw.GetCellAt(1, 3)));

            Assert.AreEqual(CellWorldAction.Left,
                            policy.action(cw.GetCellAt(2, 1)));
            Assert.AreEqual(CellWorldAction.Right,
                            policy.action(cw.GetCellAt(2, 3)));

            Assert.AreEqual(CellWorldAction.Left,
                            policy.action(cw.GetCellAt(3, 1)));
            Assert.AreEqual(CellWorldAction.Up,
                            policy.action(cw.GetCellAt(3, 2)));
            Assert.AreEqual(CellWorldAction.Right,
                            policy.action(cw.GetCellAt(3, 3)));

            Assert.AreEqual(CellWorldAction.Left,
                            policy.action(cw.GetCellAt(4, 1)));
            Assert.IsNull(policy.action(cw.GetCellAt(4, 2)));
            Assert.IsNull(policy.action(cw.GetCellAt(4, 3)));
        }
Ejemplo n.º 10
0
        protected static void output_utility_learning_rates(
            ReinforcementAgent <Cell <double>, CellWorldAction> reinforcementAgent,
            int numRuns, int numTrialsPerRun, int rmseTrialsToReport,
            int reportEveryN)
        {
            if (rmseTrialsToReport > (numTrialsPerRun / reportEveryN))
            {
                throw new IllegalArgumentException("Requesting to report too many RMSE trials, max allowed for args is "
                                                   + (numTrialsPerRun / reportEveryN));
            }

            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            cwe.AddAgent(reinforcementAgent);

            IMap <int, ICollection <IMap <Cell <double>, double> > > runs = CollectionFactory.CreateInsertionOrderedMap <int, ICollection <IMap <Cell <double>, double> > >();

            for (int r = 0; r < numRuns; r++)
            {
                reinforcementAgent.reset();
                ICollection <IMap <Cell <double>, double> > trials = CollectionFactory.CreateQueue <IMap <Cell <double>, double> >();
                for (int t = 0; t < numTrialsPerRun; t++)
                {
                    cwe.executeTrial();
                    if (0 == t % reportEveryN)
                    {
                        IMap <Cell <double>, double> u = reinforcementAgent
                                                         .getUtility();
                        //if (null == u.Get(cw.getCellAt(1, 1)))
                        //{
                        //    throw new IllegalStateException(
                        //            "Bad Utility State Encountered: r=" + r
                        //                    + ", t=" + t + ", u=" + u);
                        //}
                        trials.Add(u);
                    }
                }
                runs.Put(r, trials);
            }

            IStringBuilder v4_3 = TextFactory.CreateStringBuilder();
            IStringBuilder v3_3 = TextFactory.CreateStringBuilder();
            IStringBuilder v1_3 = TextFactory.CreateStringBuilder();
            IStringBuilder v1_1 = TextFactory.CreateStringBuilder();
            IStringBuilder v3_2 = TextFactory.CreateStringBuilder();
            IStringBuilder v2_1 = TextFactory.CreateStringBuilder();

            for (int t = 0; t < (numTrialsPerRun / reportEveryN); t++)
            {
                // Use the last run
                IMap <Cell <double>, double> u = runs.Get(numRuns - 1).Get(t);
                v4_3.Append((u.ContainsKey(cw.GetCellAt(4, 3)) ? u.Get(cw
                                                                       .GetCellAt(4, 3)) : 0.0) + "\t");
                v3_3.Append((u.ContainsKey(cw.GetCellAt(3, 3)) ? u.Get(cw
                                                                       .GetCellAt(3, 3)) : 0.0) + "\t");
                v1_3.Append((u.ContainsKey(cw.GetCellAt(1, 3)) ? u.Get(cw
                                                                       .GetCellAt(1, 3)) : 0.0) + "\t");
                v1_1.Append((u.ContainsKey(cw.GetCellAt(1, 1)) ? u.Get(cw
                                                                       .GetCellAt(1, 1)) : 0.0) + "\t");
                v3_2.Append((u.ContainsKey(cw.GetCellAt(3, 2)) ? u.Get(cw
                                                                       .GetCellAt(3, 2)) : 0.0) + "\t");
                v2_1.Append((u.ContainsKey(cw.GetCellAt(2, 1)) ? u.Get(cw
                                                                       .GetCellAt(2, 1)) : 0.0) + "\t");
            }

            IStringBuilder rmseValues = TextFactory.CreateStringBuilder();

            for (int t = 0; t < rmseTrialsToReport; t++)
            {
                // Calculate the Root Mean Square Error for utility of 1,1
                // for this trial# across all runs
                double xSsquared = 0;
                for (int r = 0; r < numRuns; r++)
                {
                    IMap <Cell <double>, double> u = runs.Get(r).Get(t);
                    double val1_1 = u.Get(cw.GetCellAt(1, 1));
                    //if (null == val1_1)
                    //{
                    //    throw new IllegalStateException(
                    //            "U(1,1,) is not present: r=" + r + ", t=" + t
                    //                    + ", runs.size=" + runs.Size()
                    //                    + ", runs(r).Size()=" + runs.Get(r).Size()
                    //                    + ", u=" + u);
                    //}
                    xSsquared += System.Math.Pow(0.705 - val1_1, 2);
                }
                double rmse = System.Math.Sqrt(xSsquared / runs.Size());
                rmseValues.Append(rmse);
                rmseValues.Append("\t");
            }

            System.Console
            .WriteLine("Note: You may copy and paste the following lines into a spreadsheet to generate graphs of learning rate and RMS error in utility:");
            System.Console.WriteLine("(4,3)" + "\t" + v4_3);
            System.Console.WriteLine("(3,3)" + "\t" + v3_3);
            System.Console.WriteLine("(1,3)" + "\t" + v1_3);
            System.Console.WriteLine("(1,1)" + "\t" + v1_1);
            System.Console.WriteLine("(3,2)" + "\t" + v3_2);
            System.Console.WriteLine("(2,1)" + "\t" + v2_1);
            System.Console.WriteLine("RMSeiu" + "\t" + rmseValues);
        }
Ejemplo n.º 11
0
        static void passiveADPAgentDemo()
        {
            System.Console.WriteLine("=======================");
            System.Console.WriteLine("DEMO: Passive-ADP-Agent");
            System.Console.WriteLine("=======================");
            System.Console.WriteLine("Figure 21.3");
            System.Console.WriteLine("-----------");

            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateInsertionOrderedMap <Cell <double>, CellWorldAction>();

            fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left);

            PassiveADPAgent <Cell <double>, CellWorldAction> padpa = new PassiveADPAgent <Cell <double>, CellWorldAction>(
                fixedPolicy, cw.GetCells(), cw.GetCellAt(1, 1),
                MDPFactory.createActionsFunctionForFigure17_1(cw),
                new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(10, 1.0));

            cwe.AddAgent(padpa);

            output_utility_learning_rates(padpa, 20, 100, 100, 1);

            System.Console.WriteLine("=========================");
        }
Ejemplo n.º 12
0
        static void policyIterationDemo()
        {
            System.Console.WriteLine("DEMO: Policy Iteration");
            System.Console.WriteLine("======================");
            System.Console.WriteLine("Figure 17.3");
            System.Console.WriteLine("-----------");

            CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1();
            IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw);
            PolicyIteration <Cell <double>, CellWorldAction>
            pi = new PolicyIteration <Cell <double>, CellWorldAction>(
                new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0));

            IPolicy <Cell <double>, CellWorldAction> policy = pi.policyIteration(mdp);

            System.Console.WriteLine("(1,1) = " + policy.action(cw.GetCellAt(1, 1)));
            System.Console.WriteLine("(1,2) = " + policy.action(cw.GetCellAt(1, 2)));
            System.Console.WriteLine("(1,3) = " + policy.action(cw.GetCellAt(1, 3)));

            System.Console.WriteLine("(2,1) = " + policy.action(cw.GetCellAt(2, 1)));
            System.Console.WriteLine("(2,3) = " + policy.action(cw.GetCellAt(2, 3)));

            System.Console.WriteLine("(3,1) = " + policy.action(cw.GetCellAt(3, 1)));
            System.Console.WriteLine("(3,2) = " + policy.action(cw.GetCellAt(3, 2)));
            System.Console.WriteLine("(3,3) = " + policy.action(cw.GetCellAt(3, 3)));

            System.Console.WriteLine("(4,1) = " + policy.action(cw.GetCellAt(4, 1)));
            System.Console.WriteLine("(4,2) = " + policy.action(cw.GetCellAt(4, 2)));
            System.Console.WriteLine("(4,3) = " + policy.action(cw.GetCellAt(4, 3)));

            System.Console.WriteLine("=========================");
        }
Ejemplo n.º 13
0
        static void valueIterationDemo()
        {
            System.Console.WriteLine("DEMO: Value Iteration");
            System.Console.WriteLine("=====================");
            System.Console.WriteLine("Figure 17.3");
            System.Console.WriteLine("-----------");

            CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1();
            IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw);
            ValueIteration <Cell <double>, CellWorldAction>
            vi = new ValueIteration <Cell <double>, CellWorldAction>(1.0);

            IMap <Cell <double>, double> U = vi.valueIteration(mdp, 0.0001);

            System.Console.WriteLine("(1,1) = " + U.Get(cw.GetCellAt(1, 1)));
            System.Console.WriteLine("(1,2) = " + U.Get(cw.GetCellAt(1, 2)));
            System.Console.WriteLine("(1,3) = " + U.Get(cw.GetCellAt(1, 3)));

            System.Console.WriteLine("(2,1) = " + U.Get(cw.GetCellAt(2, 1)));
            System.Console.WriteLine("(2,3) = " + U.Get(cw.GetCellAt(2, 3)));

            System.Console.WriteLine("(3,1) = " + U.Get(cw.GetCellAt(3, 1)));
            System.Console.WriteLine("(3,2) = " + U.Get(cw.GetCellAt(3, 2)));
            System.Console.WriteLine("(3,3) = " + U.Get(cw.GetCellAt(3, 3)));

            System.Console.WriteLine("(4,1) = " + U.Get(cw.GetCellAt(4, 1)));
            System.Console.WriteLine("(4,2) = " + U.Get(cw.GetCellAt(4, 2)));
            System.Console.WriteLine("(4,3) = " + U.Get(cw.GetCellAt(4, 3)));

            System.Console.WriteLine("=========================");
        }