static void valueIterationDemo()
        {
            System.Console.WriteLine("DEMO: Value Iteration");
            System.Console.WriteLine("=====================");
            System.Console.WriteLine("Figure 17.3");
            System.Console.WriteLine("-----------");

            CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1();
            IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw);
            ValueIteration <Cell <double>, CellWorldAction>
            vi = new ValueIteration <Cell <double>, CellWorldAction>(1.0);

            IMap <Cell <double>, double> U = vi.valueIteration(mdp, 0.0001);

            System.Console.WriteLine("(1,1) = " + U.Get(cw.GetCellAt(1, 1)));
            System.Console.WriteLine("(1,2) = " + U.Get(cw.GetCellAt(1, 2)));
            System.Console.WriteLine("(1,3) = " + U.Get(cw.GetCellAt(1, 3)));

            System.Console.WriteLine("(2,1) = " + U.Get(cw.GetCellAt(2, 1)));
            System.Console.WriteLine("(2,3) = " + U.Get(cw.GetCellAt(2, 3)));

            System.Console.WriteLine("(3,1) = " + U.Get(cw.GetCellAt(3, 1)));
            System.Console.WriteLine("(3,2) = " + U.Get(cw.GetCellAt(3, 2)));
            System.Console.WriteLine("(3,3) = " + U.Get(cw.GetCellAt(3, 3)));

            System.Console.WriteLine("(4,1) = " + U.Get(cw.GetCellAt(4, 1)));
            System.Console.WriteLine("(4,2) = " + U.Get(cw.GetCellAt(4, 2)));
            System.Console.WriteLine("(4,3) = " + U.Get(cw.GetCellAt(4, 3)));

            System.Console.WriteLine("=========================");
        }
 public void setUp()
 {
     cw  = CellWorldFactory.CreateCellWorldForFig17_1();
     mdp = MDPFactory.createMDPForFigure17_3(cw);
     pi  = new PolicyIteration <Cell <double>, CellWorldAction>(
         new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0));
 }
Example #3
0
        public void Add()
        {
            var addMap   = new List <Cell>();
            var addWorld = new CellWorld();

            var r = new Random();

            for (var i = 0; i < 1000; i++)
            {
                var x = r.Next(1000);
                var y = r.Next(1000);
                var n = new Cell {
                    X = x, Y = y
                };
                if (addMap.Contains(n) || _world[x, y])
                {
                    continue;
                }
                addMap.Add(n);
                addWorld[x, y] = true;
            }

            _world = _world.Add(addWorld);

            foreach (var position in _testMap)
            {
                Assert.IsTrue(_world[position.X, position.Y]);
            }
            foreach (var position in addMap)
            {
                Assert.IsTrue(_world[position.X, position.Y]);
            }
        }
        static void passiveTDAgentDemo()
        {
            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateMap <Cell <double>, CellWorldAction>();

            fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left);

            PassiveTDAgent <Cell <double>, CellWorldAction> ptda
                = new PassiveTDAgent <Cell <double>, CellWorldAction>(fixedPolicy, 0.2, 1.0);

            cwe.AddAgent(ptda);

            output_utility_learning_rates(ptda, 20, 500, 100, 1);
        }
Example #5
0
        public void Remove()
        {
            var removeMap   = new List <Cell>();
            var removeWorld = new CellWorld();

            var r = new Random();

            for (var i = 0; i < 1000; i++)
            {
                var index = r.Next(_testMap.Count);
                removeWorld[_testMap[index].X, _testMap[index].Y] = true;
                removeMap.Add(_testMap[index]);
                _testMap.RemoveAt(index);
            }

            _world = _world.Remove(removeWorld);

            foreach (var position in _testMap)
            {
                Assert.IsTrue(_world[position.X, position.Y]);
            }
            foreach (var position in removeMap)
            {
                Assert.IsFalse(_world[position.X, position.Y]);
            }
        }
        static void policyIterationDemo()
        {
            System.Console.WriteLine("DEMO: Policy Iteration");
            System.Console.WriteLine("======================");
            System.Console.WriteLine("Figure 17.3");
            System.Console.WriteLine("-----------");

            CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1();
            IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw);
            PolicyIteration <Cell <double>, CellWorldAction>
            pi = new PolicyIteration <Cell <double>, CellWorldAction>(
                new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0));

            IPolicy <Cell <double>, CellWorldAction> policy = pi.policyIteration(mdp);

            System.Console.WriteLine("(1,1) = " + policy.action(cw.GetCellAt(1, 1)));
            System.Console.WriteLine("(1,2) = " + policy.action(cw.GetCellAt(1, 2)));
            System.Console.WriteLine("(1,3) = " + policy.action(cw.GetCellAt(1, 3)));

            System.Console.WriteLine("(2,1) = " + policy.action(cw.GetCellAt(2, 1)));
            System.Console.WriteLine("(2,3) = " + policy.action(cw.GetCellAt(2, 3)));

            System.Console.WriteLine("(3,1) = " + policy.action(cw.GetCellAt(3, 1)));
            System.Console.WriteLine("(3,2) = " + policy.action(cw.GetCellAt(3, 2)));
            System.Console.WriteLine("(3,3) = " + policy.action(cw.GetCellAt(3, 3)));

            System.Console.WriteLine("(4,1) = " + policy.action(cw.GetCellAt(4, 1)));
            System.Console.WriteLine("(4,2) = " + policy.action(cw.GetCellAt(4, 2)));
            System.Console.WriteLine("(4,3) = " + policy.action(cw.GetCellAt(4, 3)));

            System.Console.WriteLine("=========================");
        }
Example #7
0
        /**
         * Constructs an MDP that can be used to generate the utility values
         * detailed in Fig 17.3.
         *
         * @param cw
         *            the cell world from figure 17.1.
         * @return an MDP that can be used to generate the utility values detailed
         *         in Fig 17.3.
         */

        public static MarkovDecisionProcess <Cell <Double>, CellWorldAction> createMDPForFigure17_3(
            CellWorld <Double> cw)
        {
            return(new MDP <Cell <Double>, CellWorldAction>(cw.getCells(),
                                                            cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw),
                                                            createTransitionProbabilityFunctionForFigure17_1(cw),
                                                            createRewardFunctionForFigure17_1()));
        }
Example #8
0
        /// <summary>
        /// Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4
        /// x 3 environment that presents the agent with a sequential decision
        /// problem.
        /// </summary>
        /// <returns>a cell world representation of Fig 17.1 in AIMA3e.</returns>
        public static CellWorld <double> CreateCellWorldForFig17_1()
        {
            CellWorld <double> cw = new CellWorld <double>(4, 3, -0.04);

            cw.RemoveCell(2, 2);

            cw.GetCellAt(4, 3).setContent(1.0);
            cw.GetCellAt(4, 2).setContent(-1.0);

            return(cw);
        }
        /**
         * Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4
         * x 3 environment that presents the agent with a sequential decision
         * problem.
         *
         * @return a cell world representation of Fig 17.1 in AIMA3e.
         */
        public static CellWorld<Double> createCellWorldForFig17_1()
        {
            CellWorld<Double> cw = new CellWorld<Double>(4, 3, -0.04);

            cw.removeCell(2, 2);

            cw.getCellAt(4, 3).setContent(1.0);
            cw.getCellAt(4, 2).setContent(-1.0);

            return cw;
        }
Example #10
0
            createTransitionProbabilityFunctionForFigure17_1(
                CellWorld <Double> cw)
            {
                TransitionProbabilityFunction <Cell <Double>, CellWorldAction> tf =
                    new TransitionProbabilityFunctionImpl(cw);

                {
                }
                ;

                return(tf);
            }
Example #11
0
        public static MDP <CellWorldPosition, String> CreateFourByThreeMDP()
        {
            CellWorld cw = new CellWorld(3, 4, 0.4);

            cw = new CellWorld(3, 4, -0.04);

            cw.MarkBlocked(2, 2);

            cw.SetTerminalState(2, 4);
            cw.SetReward(2, 4, -1);

            cw.SetTerminalState(3, 4);
            cw.SetReward(3, 4, 1);
            return(cw.AsMdp());
        }
Example #12
0
        public static MDP<CellWorldPosition, String> createFourByThreeMDP()
        {

            CellWorld cw = new CellWorld(3, 4, 0.4);
            cw = new CellWorld(3, 4, -0.04);

            cw.markBlocked(2, 2);

            cw.setTerminalState(2, 4);
            cw.setReward(2, 4, -1);

            cw.setTerminalState(3, 4);
            cw.setReward(3, 4, 1);
            return cw.asMdp();
        }
Example #13
0
	public void setUp() {
		cw = new CellWorld(3, 4, -0.04);

		cw.markBlocked(2, 2);

		cw.setTerminalState(2, 4);
		cw.setReward(2, 4, -1);

		cw.setTerminalState(3, 4);
		cw.setReward(3, 4, 1);

		alwaysLessThanEightyPercent = new MockRandomizer(new double[] { 0.7 });
		betweenEightyAndNinetyPercent = new MockRandomizer(
				new double[] { 0.85 });
		greaterThanNinetyPercent = new MockRandomizer(new double[] { 0.95 });
	}
Example #14
0
            /**
             * Returns the allowed actions from a specified cell within the cell world
             * described in Fig 17.1.
             *
             * @param cw
             *            the cell world from figure 17.1.
             * @return the set of actions allowed at a particular cell. This set will be
             *         empty if at a terminal state.
             */

            public static ActionsFunction <Cell <Double>, CellWorldAction> createActionsFunctionForFigure17_1(
                CellWorld <Double> cw)
            {
                Set <Cell <Double> > terminals = new Set <Cell <Double> >();

                terminals.add(cw.getCellAt(4, 3));
                terminals.add(cw.getCellAt(4, 2));

                ActionsFunction <Cell <Double>, CellWorldAction> af = null;

                //new ActionsFunction<Cell<Double>, CellWorldAction>() {



                //};
                // TODO
                return(af);
            }
Example #15
0
        static void qLearningAgentDemo()
        {
            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            QLearningAgent <Cell <double>, CellWorldAction> qla = new QLearningAgent <Cell <double>, CellWorldAction>(
                MDPFactory.createActionsFunctionForFigure17_1(cw),
                CellWorldAction.None, 0.2, 1.0, 5,
                2.0);

            cwe.AddAgent(qla);

            output_utility_learning_rates(qla, 20, 10000, 500, 20);
        }
Example #16
0
        public void Prepare()
        {
            _testMap = new List <Cell>
            {
            };

            for (var i = 0; i < 100; i++)
            {
                for (var j = 0; j < 100; j++)
                {
                    _testMap.Add(new Cell {
                        X = i, Y = j
                    });
                }
            }

            var r = new Random();

            for (var i = 0; i < 10000; i++)
            {
                var x = r.Next(1000);
                var y = r.Next(1000);
                var n = new Cell {
                    X = 100 + x, Y = 100 + y
                };
                if (_testMap.Contains(n))
                {
                    continue;
                }
                _testMap.Add(n);
            }

            _world = new CellWorld();

            foreach (var position in _testMap)
            {
                _world[position.X, position.Y] = true;
            }
        }
Example #17
0
        static void passiveADPAgentDemo()
        {
            System.Console.WriteLine("=======================");
            System.Console.WriteLine("DEMO: Passive-ADP-Agent");
            System.Console.WriteLine("=======================");
            System.Console.WriteLine("Figure 21.3");
            System.Console.WriteLine("-----------");

            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateInsertionOrderedMap <Cell <double>, CellWorldAction>();

            fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left);
            fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up);
            fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right);
            fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left);

            PassiveADPAgent <Cell <double>, CellWorldAction> padpa = new PassiveADPAgent <Cell <double>, CellWorldAction>(
                fixedPolicy, cw.GetCells(), cw.GetCellAt(1, 1),
                MDPFactory.createActionsFunctionForFigure17_1(cw),
                new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(10, 1.0));

            cwe.AddAgent(padpa);

            output_utility_learning_rates(padpa, 20, 100, 100, 1);

            System.Console.WriteLine("=========================");
        }
Example #18
0
 public void setUp()
 {
     cw = CellWorldFactory.CreateCellWorldForFig17_1();
     mdp = MDPFactory.createMDPForFigure17_3(cw);
     vi = new ValueIteration<Cell<double>, CellWorldAction>(1.0);
 }
Example #19
0
        /**
         * Figure 17.1 (b) Illustration of the transition model of the environment:
         * the 'intended' outcome occurs with probability 0.8, but with probability
         * 0.2 the agent moves at right angles to the intended direction. A
         * collision with a wall results in no movement.
         *
         * @param cw
         *            the cell world from figure 17.1.
         * @return the transition probability function as described in figure 17.1.
         */
        public static ITransitionProbabilityFunction <Cell <double>, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1(CellWorld <double> cw)
        {
            ITransitionProbabilityFunction <Cell <double>, CellWorldAction> tf = new createTransitionProbabilityFunctionForFigure17_1TransitionProbabilityFunction(cw);

            return(tf);
        }
Example #20
0
        /**
         * Returns the allowed actions from a specified cell within the cell world
         * described in Fig 17.1.
         *
         * @param cw
         *            the cell world from figure 17.1.
         * @return the set of actions allowed at a particular cell. This set will be
         *         empty if at a terminal state.
         */
        public static IActionsFunction <Cell <double>, CellWorldAction> createActionsFunctionForFigure17_1(CellWorld <double> cw)
        {
            ISet <Cell <double> > terminals = CollectionFactory.CreateSet <Cell <double> >();

            terminals.Add(cw.GetCellAt(4, 3));
            terminals.Add(cw.GetCellAt(4, 2));

            IActionsFunction <Cell <double>, CellWorldAction> af = new createActionsFunctionForFigure17_1ActionsFunction(terminals);

            return(af);
        }
Example #21
0
 public void setUp()
 {
     cw = CellWorldFactory.CreateCellWorldForFig17_1();
 }
Example #22
0
 public createTransitionProbabilityFunctionForFigure17_1TransitionProbabilityFunction(CellWorld <double> cw)
 {
     this.cw = cw;
 }
Example #23
0
	//
	// PRIVATE METHODS
	//
	private void assertPolicyReccomends(CellWorld cw,
			MDPUtilityFunction<CellWorldPosition> uf, int x, int y,
			String actionExpected) {
		Pair<String, Double> p = cw.getTransitionModel()
				.getTransitionWithMaximumExpectedUtility(
						new CellWorldPosition(x, y), uf);

		Assert.assertEquals(actionExpected, p.getFirst());
	}
Example #24
0
        public Example_AForge_Animat(Base parent)
            : base(parent)
        {
            //  GUI
            {
                m_TopPanel = new Base(this);
                {
                    m_TopPanel.Dock   = Pos.Top;
                    m_TopPanel.Height = 30;

                    Alt.GUI.Temporary.Gwen.Control.Label label = new Alt.GUI.Temporary.Gwen.Control.Label(m_TopPanel);
                    label.Location           = new PointI(0, 2);
                    label.AutoSizeToContents = true;
                    label.TextColor          = Color.LightGreen;
                    label.Text = "Map";


                    label                    = new Alt.GUI.Temporary.Gwen.Control.Label(m_TopPanel);
                    label.Location           = new PointI(33, 2);
                    label.Text               = "World size:";
                    label.AutoSizeToContents = true;


                    worldSizeBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(m_TopPanel);
                    worldSizeBox.Location = new PointI(102, 0);
                    worldSizeBox.ReadOnly = true;
                    worldSizeBox.Width    = 40;


                    loadButton1                 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel);
                    loadButton1.Location        = new PointI(160, 0);
                    loadButton1.Width           = 80;
                    loadButton1.Text            = "Sample 1";
                    loadButton1.Click          += new System.EventHandler(loadButton1_Click);
                    loadButton1.NormalTextColor = Color.Red * 0.8;

                    loadButton2                 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel);
                    loadButton2.Location        = new PointI(250, 0);
                    loadButton2.Width           = 80;
                    loadButton2.Text            = "Sample 2";
                    loadButton2.Click          += new System.EventHandler(loadButton2_Click);
                    loadButton2.NormalTextColor = Color.Brown;

                    loadButton3                 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel);
                    loadButton3.Location        = new PointI(340, 0);
                    loadButton3.Width           = 80;
                    loadButton3.Text            = "Test 1";
                    loadButton3.Click          += new System.EventHandler(loadButton3_Click);
                    loadButton3.NormalTextColor = Color.Green;

                    loadButton4                 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel);
                    loadButton4.Location        = new PointI(430, 0);
                    loadButton4.Width           = 80;
                    loadButton4.Text            = "Test 2";
                    loadButton4.Click          += new System.EventHandler(loadButton4_Click);
                    loadButton4.NormalTextColor = Color.Blue;

                    loadButton5                 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel);
                    loadButton5.Location        = new PointI(520, 0);
                    loadButton5.Width           = 80;
                    loadButton5.Text            = "Test 3";
                    loadButton5.Click          += new System.EventHandler(loadButton5_Click);
                    loadButton5.NormalTextColor = Color.Violet * 0.8;
                }


                m_RightPanel = new Base(this);
                {
                    m_RightPanel.Dock   = Pos.Right;
                    m_RightPanel.Width  = 140;
                    m_RightPanel.Margin = new Margin(5, 0, 0, 0);

                    Alt.GUI.Temporary.Gwen.Control.GroupBox groupBox = new Alt.GUI.Temporary.Gwen.Control.GroupBox(m_RightPanel);
                    {
                        groupBox.Location = new PointI(0, 0);
                        groupBox.Size     = new SizeI(140, 363);
                        groupBox.Text     = "Settings";


                        Alt.GUI.Temporary.Gwen.Control.Label label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 10);
                        label.AutoSizeToContents = true;
                        label.Text = "Learning algorithm:";

                        algorithmCombo          = new Alt.GUI.Temporary.Gwen.Control.ComboBox(groupBox);
                        algorithmCombo.Location = new PointI(0, 30);
                        algorithmCombo.Width    = 130;
                        algorithmCombo.AddItem("Q-Learning").UserData = 0;
                        algorithmCombo.AddItem("Sarsa").UserData      = 1;


                        label                    = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 65);
                        label.AutoSizeToContents = true;
                        label.Text               = "Initial exploration rate:";

                        explorationRateBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox);
                        explorationRateBox.Location = new PointI(0, 85);
                        explorationRateBox.Width    = 130;

                        label                    = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 110);
                        label.AutoSizeToContents = true;
                        label.Text               = "Initial learning rate:";

                        learningRateBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox);
                        learningRateBox.Location = new PointI(0, 130);
                        learningRateBox.Width    = 130;

                        label                    = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 155);
                        label.AutoSizeToContents = true;
                        label.Text               = "Learning iterations:";

                        iterationsBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox);
                        iterationsBox.Location = new PointI(0, 175);
                        iterationsBox.Width    = 130;


                        label                    = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 210);
                        label.AutoSizeToContents = true;
                        label.Text               = "Move reward:";

                        moveRewardBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox);
                        moveRewardBox.Location = new PointI(0, 230);
                        moveRewardBox.Width    = 130;
                        moveRewardBox.ReadOnly = true;

                        label                    = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 255);
                        label.AutoSizeToContents = true;
                        label.Text               = "Wall reward:";

                        wallRewardBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox);
                        wallRewardBox.Location = new PointI(0, 275);
                        wallRewardBox.Width    = 130;
                        wallRewardBox.ReadOnly = true;

                        label                    = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 300);
                        label.AutoSizeToContents = true;
                        label.Text               = "Goal reward:";

                        goalRewardBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox);
                        goalRewardBox.Location = new PointI(0, 320);
                        goalRewardBox.Width    = 130;
                        goalRewardBox.ReadOnly = true;
                    }



                    groupBox = new Alt.GUI.Temporary.Gwen.Control.GroupBox(m_RightPanel);
                    {
                        groupBox.Location = new PointI(0, 380);
                        groupBox.Size     = new SizeI(140, 173);
                        groupBox.Text     = "Learning";


                        Alt.GUI.Temporary.Gwen.Control.Label label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox);
                        label.Location           = new PointI(0, 10);
                        label.AutoSizeToContents = true;
                        label.Text = "Iteration:";

                        iterationBox          = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox);
                        iterationBox.Location = new PointI(0, 30);
                        iterationBox.Width    = 130;
                        iterationBox.ReadOnly = true;


                        startLearningButton                 = new Alt.GUI.Temporary.Gwen.Control.Button(groupBox);
                        startLearningButton.Location        = new PointI(5, 70);
                        startLearningButton.Width           = 120;
                        startLearningButton.Text            = "Start";
                        startLearningButton.Click          += new System.EventHandler(startLearningButton_Click);
                        startLearningButton.NormalTextColor = Color.Green;

                        stopButton                 = new Alt.GUI.Temporary.Gwen.Control.Button(groupBox);
                        stopButton.Location        = new PointI(5, 100);
                        stopButton.Width           = 120;
                        stopButton.Text            = "Stop";
                        stopButton.Click          += new System.EventHandler(stopButton_Click);
                        stopButton.NormalTextColor = Color.Red;

                        showSolutionButton                 = new Alt.GUI.Temporary.Gwen.Control.Button(groupBox);
                        showSolutionButton.Location        = new PointI(5, 130);
                        showSolutionButton.Width           = 120;
                        showSolutionButton.Text            = "Show solution";
                        showSolutionButton.Click          += new System.EventHandler(showSolutionButton_Click);
                        showSolutionButton.NormalTextColor = Color.Blue;
                    }
                }


                cellWorld          = new CellWorld(this);
                cellWorld.Coloring = null;
                cellWorld.Map      = null;
            }
        }
Example #25
0
        protected static void output_utility_learning_rates(
            ReinforcementAgent <Cell <double>, CellWorldAction> reinforcementAgent,
            int numRuns, int numTrialsPerRun, int rmseTrialsToReport,
            int reportEveryN)
        {
            if (rmseTrialsToReport > (numTrialsPerRun / reportEveryN))
            {
                throw new IllegalArgumentException("Requesting to report too many RMSE trials, max allowed for args is "
                                                   + (numTrialsPerRun / reportEveryN));
            }

            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            cwe.AddAgent(reinforcementAgent);

            IMap <int, ICollection <IMap <Cell <double>, double> > > runs = CollectionFactory.CreateInsertionOrderedMap <int, ICollection <IMap <Cell <double>, double> > >();

            for (int r = 0; r < numRuns; r++)
            {
                reinforcementAgent.reset();
                ICollection <IMap <Cell <double>, double> > trials = CollectionFactory.CreateQueue <IMap <Cell <double>, double> >();
                for (int t = 0; t < numTrialsPerRun; t++)
                {
                    cwe.executeTrial();
                    if (0 == t % reportEveryN)
                    {
                        IMap <Cell <double>, double> u = reinforcementAgent
                                                         .getUtility();
                        //if (null == u.Get(cw.getCellAt(1, 1)))
                        //{
                        //    throw new IllegalStateException(
                        //            "Bad Utility State Encountered: r=" + r
                        //                    + ", t=" + t + ", u=" + u);
                        //}
                        trials.Add(u);
                    }
                }
                runs.Put(r, trials);
            }

            IStringBuilder v4_3 = TextFactory.CreateStringBuilder();
            IStringBuilder v3_3 = TextFactory.CreateStringBuilder();
            IStringBuilder v1_3 = TextFactory.CreateStringBuilder();
            IStringBuilder v1_1 = TextFactory.CreateStringBuilder();
            IStringBuilder v3_2 = TextFactory.CreateStringBuilder();
            IStringBuilder v2_1 = TextFactory.CreateStringBuilder();

            for (int t = 0; t < (numTrialsPerRun / reportEveryN); t++)
            {
                // Use the last run
                IMap <Cell <double>, double> u = runs.Get(numRuns - 1).Get(t);
                v4_3.Append((u.ContainsKey(cw.GetCellAt(4, 3)) ? u.Get(cw
                                                                       .GetCellAt(4, 3)) : 0.0) + "\t");
                v3_3.Append((u.ContainsKey(cw.GetCellAt(3, 3)) ? u.Get(cw
                                                                       .GetCellAt(3, 3)) : 0.0) + "\t");
                v1_3.Append((u.ContainsKey(cw.GetCellAt(1, 3)) ? u.Get(cw
                                                                       .GetCellAt(1, 3)) : 0.0) + "\t");
                v1_1.Append((u.ContainsKey(cw.GetCellAt(1, 1)) ? u.Get(cw
                                                                       .GetCellAt(1, 1)) : 0.0) + "\t");
                v3_2.Append((u.ContainsKey(cw.GetCellAt(3, 2)) ? u.Get(cw
                                                                       .GetCellAt(3, 2)) : 0.0) + "\t");
                v2_1.Append((u.ContainsKey(cw.GetCellAt(2, 1)) ? u.Get(cw
                                                                       .GetCellAt(2, 1)) : 0.0) + "\t");
            }

            IStringBuilder rmseValues = TextFactory.CreateStringBuilder();

            for (int t = 0; t < rmseTrialsToReport; t++)
            {
                // Calculate the Root Mean Square Error for utility of 1,1
                // for this trial# across all runs
                double xSsquared = 0;
                for (int r = 0; r < numRuns; r++)
                {
                    IMap <Cell <double>, double> u = runs.Get(r).Get(t);
                    double val1_1 = u.Get(cw.GetCellAt(1, 1));
                    //if (null == val1_1)
                    //{
                    //    throw new IllegalStateException(
                    //            "U(1,1,) is not present: r=" + r + ", t=" + t
                    //                    + ", runs.size=" + runs.Size()
                    //                    + ", runs(r).Size()=" + runs.Get(r).Size()
                    //                    + ", u=" + u);
                    //}
                    xSsquared += System.Math.Pow(0.705 - val1_1, 2);
                }
                double rmse = System.Math.Sqrt(xSsquared / runs.Size());
                rmseValues.Append(rmse);
                rmseValues.Append("\t");
            }

            System.Console
            .WriteLine("Note: You may copy and paste the following lines into a spreadsheet to generate graphs of learning rate and RMS error in utility:");
            System.Console.WriteLine("(4,3)" + "\t" + v4_3);
            System.Console.WriteLine("(3,3)" + "\t" + v3_3);
            System.Console.WriteLine("(1,3)" + "\t" + v1_3);
            System.Console.WriteLine("(1,1)" + "\t" + v1_1);
            System.Console.WriteLine("(3,2)" + "\t" + v3_2);
            System.Console.WriteLine("(2,1)" + "\t" + v2_1);
            System.Console.WriteLine("RMSeiu" + "\t" + rmseValues);
        }
Example #26
0
 public void setUp()
 {
     cw  = CellWorldFactory.CreateCellWorldForFig17_1();
     mdp = MDPFactory.createMDPForFigure17_3(cw);
 }
Example #27
0
 public TransitionProbabilityFunctionImpl(CellWorld <Double> cw)
 {
     this.cw = cw;
 }