static void valueIterationDemo() { System.Console.WriteLine("DEMO: Value Iteration"); System.Console.WriteLine("====================="); System.Console.WriteLine("Figure 17.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw); ValueIteration <Cell <double>, CellWorldAction> vi = new ValueIteration <Cell <double>, CellWorldAction>(1.0); IMap <Cell <double>, double> U = vi.valueIteration(mdp, 0.0001); System.Console.WriteLine("(1,1) = " + U.Get(cw.GetCellAt(1, 1))); System.Console.WriteLine("(1,2) = " + U.Get(cw.GetCellAt(1, 2))); System.Console.WriteLine("(1,3) = " + U.Get(cw.GetCellAt(1, 3))); System.Console.WriteLine("(2,1) = " + U.Get(cw.GetCellAt(2, 1))); System.Console.WriteLine("(2,3) = " + U.Get(cw.GetCellAt(2, 3))); System.Console.WriteLine("(3,1) = " + U.Get(cw.GetCellAt(3, 1))); System.Console.WriteLine("(3,2) = " + U.Get(cw.GetCellAt(3, 2))); System.Console.WriteLine("(3,3) = " + U.Get(cw.GetCellAt(3, 3))); System.Console.WriteLine("(4,1) = " + U.Get(cw.GetCellAt(4, 1))); System.Console.WriteLine("(4,2) = " + U.Get(cw.GetCellAt(4, 2))); System.Console.WriteLine("(4,3) = " + U.Get(cw.GetCellAt(4, 3))); System.Console.WriteLine("========================="); }
public void setUp() { cw = CellWorldFactory.CreateCellWorldForFig17_1(); mdp = MDPFactory.createMDPForFigure17_3(cw); pi = new PolicyIteration <Cell <double>, CellWorldAction>( new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0)); }
public void Add() { var addMap = new List <Cell>(); var addWorld = new CellWorld(); var r = new Random(); for (var i = 0; i < 1000; i++) { var x = r.Next(1000); var y = r.Next(1000); var n = new Cell { X = x, Y = y }; if (addMap.Contains(n) || _world[x, y]) { continue; } addMap.Add(n); addWorld[x, y] = true; } _world = _world.Add(addWorld); foreach (var position in _testMap) { Assert.IsTrue(_world[position.X, position.Y]); } foreach (var position in addMap) { Assert.IsTrue(_world[position.X, position.Y]); } }
static void passiveTDAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateMap <Cell <double>, CellWorldAction>(); fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left); PassiveTDAgent <Cell <double>, CellWorldAction> ptda = new PassiveTDAgent <Cell <double>, CellWorldAction>(fixedPolicy, 0.2, 1.0); cwe.AddAgent(ptda); output_utility_learning_rates(ptda, 20, 500, 100, 1); }
public void Remove() { var removeMap = new List <Cell>(); var removeWorld = new CellWorld(); var r = new Random(); for (var i = 0; i < 1000; i++) { var index = r.Next(_testMap.Count); removeWorld[_testMap[index].X, _testMap[index].Y] = true; removeMap.Add(_testMap[index]); _testMap.RemoveAt(index); } _world = _world.Remove(removeWorld); foreach (var position in _testMap) { Assert.IsTrue(_world[position.X, position.Y]); } foreach (var position in removeMap) { Assert.IsFalse(_world[position.X, position.Y]); } }
static void policyIterationDemo() { System.Console.WriteLine("DEMO: Policy Iteration"); System.Console.WriteLine("======================"); System.Console.WriteLine("Figure 17.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw); PolicyIteration <Cell <double>, CellWorldAction> pi = new PolicyIteration <Cell <double>, CellWorldAction>( new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0)); IPolicy <Cell <double>, CellWorldAction> policy = pi.policyIteration(mdp); System.Console.WriteLine("(1,1) = " + policy.action(cw.GetCellAt(1, 1))); System.Console.WriteLine("(1,2) = " + policy.action(cw.GetCellAt(1, 2))); System.Console.WriteLine("(1,3) = " + policy.action(cw.GetCellAt(1, 3))); System.Console.WriteLine("(2,1) = " + policy.action(cw.GetCellAt(2, 1))); System.Console.WriteLine("(2,3) = " + policy.action(cw.GetCellAt(2, 3))); System.Console.WriteLine("(3,1) = " + policy.action(cw.GetCellAt(3, 1))); System.Console.WriteLine("(3,2) = " + policy.action(cw.GetCellAt(3, 2))); System.Console.WriteLine("(3,3) = " + policy.action(cw.GetCellAt(3, 3))); System.Console.WriteLine("(4,1) = " + policy.action(cw.GetCellAt(4, 1))); System.Console.WriteLine("(4,2) = " + policy.action(cw.GetCellAt(4, 2))); System.Console.WriteLine("(4,3) = " + policy.action(cw.GetCellAt(4, 3))); System.Console.WriteLine("========================="); }
/** * Constructs an MDP that can be used to generate the utility values * detailed in Fig 17.3. * * @param cw * the cell world from figure 17.1. * @return an MDP that can be used to generate the utility values detailed * in Fig 17.3. */ public static MarkovDecisionProcess <Cell <Double>, CellWorldAction> createMDPForFigure17_3( CellWorld <Double> cw) { return(new MDP <Cell <Double>, CellWorldAction>(cw.getCells(), cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw), createTransitionProbabilityFunctionForFigure17_1(cw), createRewardFunctionForFigure17_1())); }
/// <summary> /// Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4 /// x 3 environment that presents the agent with a sequential decision /// problem. /// </summary> /// <returns>a cell world representation of Fig 17.1 in AIMA3e.</returns> public static CellWorld <double> CreateCellWorldForFig17_1() { CellWorld <double> cw = new CellWorld <double>(4, 3, -0.04); cw.RemoveCell(2, 2); cw.GetCellAt(4, 3).setContent(1.0); cw.GetCellAt(4, 2).setContent(-1.0); return(cw); }
/** * Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4 * x 3 environment that presents the agent with a sequential decision * problem. * * @return a cell world representation of Fig 17.1 in AIMA3e. */ public static CellWorld<Double> createCellWorldForFig17_1() { CellWorld<Double> cw = new CellWorld<Double>(4, 3, -0.04); cw.removeCell(2, 2); cw.getCellAt(4, 3).setContent(1.0); cw.getCellAt(4, 2).setContent(-1.0); return cw; }
createTransitionProbabilityFunctionForFigure17_1( CellWorld <Double> cw) { TransitionProbabilityFunction <Cell <Double>, CellWorldAction> tf = new TransitionProbabilityFunctionImpl(cw); { } ; return(tf); }
public static MDP <CellWorldPosition, String> CreateFourByThreeMDP() { CellWorld cw = new CellWorld(3, 4, 0.4); cw = new CellWorld(3, 4, -0.04); cw.MarkBlocked(2, 2); cw.SetTerminalState(2, 4); cw.SetReward(2, 4, -1); cw.SetTerminalState(3, 4); cw.SetReward(3, 4, 1); return(cw.AsMdp()); }
public static MDP<CellWorldPosition, String> createFourByThreeMDP() { CellWorld cw = new CellWorld(3, 4, 0.4); cw = new CellWorld(3, 4, -0.04); cw.markBlocked(2, 2); cw.setTerminalState(2, 4); cw.setReward(2, 4, -1); cw.setTerminalState(3, 4); cw.setReward(3, 4, 1); return cw.asMdp(); }
public void setUp() { cw = new CellWorld(3, 4, -0.04); cw.markBlocked(2, 2); cw.setTerminalState(2, 4); cw.setReward(2, 4, -1); cw.setTerminalState(3, 4); cw.setReward(3, 4, 1); alwaysLessThanEightyPercent = new MockRandomizer(new double[] { 0.7 }); betweenEightyAndNinetyPercent = new MockRandomizer( new double[] { 0.85 }); greaterThanNinetyPercent = new MockRandomizer(new double[] { 0.95 }); }
/** * Returns the allowed actions from a specified cell within the cell world * described in Fig 17.1. * * @param cw * the cell world from figure 17.1. * @return the set of actions allowed at a particular cell. This set will be * empty if at a terminal state. */ public static ActionsFunction <Cell <Double>, CellWorldAction> createActionsFunctionForFigure17_1( CellWorld <Double> cw) { Set <Cell <Double> > terminals = new Set <Cell <Double> >(); terminals.add(cw.getCellAt(4, 3)); terminals.add(cw.getCellAt(4, 2)); ActionsFunction <Cell <Double>, CellWorldAction> af = null; //new ActionsFunction<Cell<Double>, CellWorldAction>() { //}; // TODO return(af); }
static void qLearningAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); QLearningAgent <Cell <double>, CellWorldAction> qla = new QLearningAgent <Cell <double>, CellWorldAction>( MDPFactory.createActionsFunctionForFigure17_1(cw), CellWorldAction.None, 0.2, 1.0, 5, 2.0); cwe.AddAgent(qla); output_utility_learning_rates(qla, 20, 10000, 500, 20); }
public void Prepare() { _testMap = new List <Cell> { }; for (var i = 0; i < 100; i++) { for (var j = 0; j < 100; j++) { _testMap.Add(new Cell { X = i, Y = j }); } } var r = new Random(); for (var i = 0; i < 10000; i++) { var x = r.Next(1000); var y = r.Next(1000); var n = new Cell { X = 100 + x, Y = 100 + y }; if (_testMap.Contains(n)) { continue; } _testMap.Add(n); } _world = new CellWorld(); foreach (var position in _testMap) { _world[position.X, position.Y] = true; } }
static void passiveADPAgentDemo() { System.Console.WriteLine("======================="); System.Console.WriteLine("DEMO: Passive-ADP-Agent"); System.Console.WriteLine("======================="); System.Console.WriteLine("Figure 21.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateInsertionOrderedMap <Cell <double>, CellWorldAction>(); fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left); PassiveADPAgent <Cell <double>, CellWorldAction> padpa = new PassiveADPAgent <Cell <double>, CellWorldAction>( fixedPolicy, cw.GetCells(), cw.GetCellAt(1, 1), MDPFactory.createActionsFunctionForFigure17_1(cw), new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(10, 1.0)); cwe.AddAgent(padpa); output_utility_learning_rates(padpa, 20, 100, 100, 1); System.Console.WriteLine("========================="); }
public void setUp() { cw = CellWorldFactory.CreateCellWorldForFig17_1(); mdp = MDPFactory.createMDPForFigure17_3(cw); vi = new ValueIteration<Cell<double>, CellWorldAction>(1.0); }
/** * Figure 17.1 (b) Illustration of the transition model of the environment: * the 'intended' outcome occurs with probability 0.8, but with probability * 0.2 the agent moves at right angles to the intended direction. A * collision with a wall results in no movement. * * @param cw * the cell world from figure 17.1. * @return the transition probability function as described in figure 17.1. */ public static ITransitionProbabilityFunction <Cell <double>, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1(CellWorld <double> cw) { ITransitionProbabilityFunction <Cell <double>, CellWorldAction> tf = new createTransitionProbabilityFunctionForFigure17_1TransitionProbabilityFunction(cw); return(tf); }
/** * Returns the allowed actions from a specified cell within the cell world * described in Fig 17.1. * * @param cw * the cell world from figure 17.1. * @return the set of actions allowed at a particular cell. This set will be * empty if at a terminal state. */ public static IActionsFunction <Cell <double>, CellWorldAction> createActionsFunctionForFigure17_1(CellWorld <double> cw) { ISet <Cell <double> > terminals = CollectionFactory.CreateSet <Cell <double> >(); terminals.Add(cw.GetCellAt(4, 3)); terminals.Add(cw.GetCellAt(4, 2)); IActionsFunction <Cell <double>, CellWorldAction> af = new createActionsFunctionForFigure17_1ActionsFunction(terminals); return(af); }
public void setUp() { cw = CellWorldFactory.CreateCellWorldForFig17_1(); }
public createTransitionProbabilityFunctionForFigure17_1TransitionProbabilityFunction(CellWorld <double> cw) { this.cw = cw; }
// // PRIVATE METHODS // private void assertPolicyReccomends(CellWorld cw, MDPUtilityFunction<CellWorldPosition> uf, int x, int y, String actionExpected) { Pair<String, Double> p = cw.getTransitionModel() .getTransitionWithMaximumExpectedUtility( new CellWorldPosition(x, y), uf); Assert.assertEquals(actionExpected, p.getFirst()); }
public Example_AForge_Animat(Base parent) : base(parent) { // GUI { m_TopPanel = new Base(this); { m_TopPanel.Dock = Pos.Top; m_TopPanel.Height = 30; Alt.GUI.Temporary.Gwen.Control.Label label = new Alt.GUI.Temporary.Gwen.Control.Label(m_TopPanel); label.Location = new PointI(0, 2); label.AutoSizeToContents = true; label.TextColor = Color.LightGreen; label.Text = "Map"; label = new Alt.GUI.Temporary.Gwen.Control.Label(m_TopPanel); label.Location = new PointI(33, 2); label.Text = "World size:"; label.AutoSizeToContents = true; worldSizeBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(m_TopPanel); worldSizeBox.Location = new PointI(102, 0); worldSizeBox.ReadOnly = true; worldSizeBox.Width = 40; loadButton1 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel); loadButton1.Location = new PointI(160, 0); loadButton1.Width = 80; loadButton1.Text = "Sample 1"; loadButton1.Click += new System.EventHandler(loadButton1_Click); loadButton1.NormalTextColor = Color.Red * 0.8; loadButton2 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel); loadButton2.Location = new PointI(250, 0); loadButton2.Width = 80; loadButton2.Text = "Sample 2"; loadButton2.Click += new System.EventHandler(loadButton2_Click); loadButton2.NormalTextColor = Color.Brown; loadButton3 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel); loadButton3.Location = new PointI(340, 0); loadButton3.Width = 80; loadButton3.Text = "Test 1"; loadButton3.Click += new System.EventHandler(loadButton3_Click); loadButton3.NormalTextColor = Color.Green; loadButton4 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel); loadButton4.Location = new PointI(430, 0); loadButton4.Width = 80; loadButton4.Text = "Test 2"; loadButton4.Click += new System.EventHandler(loadButton4_Click); loadButton4.NormalTextColor = Color.Blue; loadButton5 = new Alt.GUI.Temporary.Gwen.Control.Button(m_TopPanel); loadButton5.Location = new PointI(520, 0); loadButton5.Width = 80; loadButton5.Text = "Test 3"; loadButton5.Click += new System.EventHandler(loadButton5_Click); loadButton5.NormalTextColor = Color.Violet * 0.8; } m_RightPanel = new Base(this); { m_RightPanel.Dock = Pos.Right; m_RightPanel.Width = 140; m_RightPanel.Margin = new Margin(5, 0, 0, 0); Alt.GUI.Temporary.Gwen.Control.GroupBox groupBox = new Alt.GUI.Temporary.Gwen.Control.GroupBox(m_RightPanel); { groupBox.Location = new PointI(0, 0); groupBox.Size = new SizeI(140, 363); groupBox.Text = "Settings"; Alt.GUI.Temporary.Gwen.Control.Label label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 10); label.AutoSizeToContents = true; label.Text = "Learning algorithm:"; algorithmCombo = new Alt.GUI.Temporary.Gwen.Control.ComboBox(groupBox); algorithmCombo.Location = new PointI(0, 30); algorithmCombo.Width = 130; algorithmCombo.AddItem("Q-Learning").UserData = 0; algorithmCombo.AddItem("Sarsa").UserData = 1; label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 65); label.AutoSizeToContents = true; label.Text = "Initial exploration rate:"; explorationRateBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox); explorationRateBox.Location = new PointI(0, 85); explorationRateBox.Width = 130; label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 110); label.AutoSizeToContents = true; label.Text = "Initial learning rate:"; learningRateBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox); learningRateBox.Location = new PointI(0, 130); learningRateBox.Width = 130; label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 155); label.AutoSizeToContents = true; label.Text = "Learning iterations:"; iterationsBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox); iterationsBox.Location = new PointI(0, 175); iterationsBox.Width = 130; label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 210); label.AutoSizeToContents = true; label.Text = "Move reward:"; moveRewardBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox); moveRewardBox.Location = new PointI(0, 230); moveRewardBox.Width = 130; moveRewardBox.ReadOnly = true; label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 255); label.AutoSizeToContents = true; label.Text = "Wall reward:"; wallRewardBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox); wallRewardBox.Location = new PointI(0, 275); wallRewardBox.Width = 130; wallRewardBox.ReadOnly = true; label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 300); label.AutoSizeToContents = true; label.Text = "Goal reward:"; goalRewardBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox); goalRewardBox.Location = new PointI(0, 320); goalRewardBox.Width = 130; goalRewardBox.ReadOnly = true; } groupBox = new Alt.GUI.Temporary.Gwen.Control.GroupBox(m_RightPanel); { groupBox.Location = new PointI(0, 380); groupBox.Size = new SizeI(140, 173); groupBox.Text = "Learning"; Alt.GUI.Temporary.Gwen.Control.Label label = new Alt.GUI.Temporary.Gwen.Control.Label(groupBox); label.Location = new PointI(0, 10); label.AutoSizeToContents = true; label.Text = "Iteration:"; iterationBox = new Alt.GUI.Temporary.Gwen.Control.TextBox(groupBox); iterationBox.Location = new PointI(0, 30); iterationBox.Width = 130; iterationBox.ReadOnly = true; startLearningButton = new Alt.GUI.Temporary.Gwen.Control.Button(groupBox); startLearningButton.Location = new PointI(5, 70); startLearningButton.Width = 120; startLearningButton.Text = "Start"; startLearningButton.Click += new System.EventHandler(startLearningButton_Click); startLearningButton.NormalTextColor = Color.Green; stopButton = new Alt.GUI.Temporary.Gwen.Control.Button(groupBox); stopButton.Location = new PointI(5, 100); stopButton.Width = 120; stopButton.Text = "Stop"; stopButton.Click += new System.EventHandler(stopButton_Click); stopButton.NormalTextColor = Color.Red; showSolutionButton = new Alt.GUI.Temporary.Gwen.Control.Button(groupBox); showSolutionButton.Location = new PointI(5, 130); showSolutionButton.Width = 120; showSolutionButton.Text = "Show solution"; showSolutionButton.Click += new System.EventHandler(showSolutionButton_Click); showSolutionButton.NormalTextColor = Color.Blue; } } cellWorld = new CellWorld(this); cellWorld.Coloring = null; cellWorld.Map = null; } }
protected static void output_utility_learning_rates( ReinforcementAgent <Cell <double>, CellWorldAction> reinforcementAgent, int numRuns, int numTrialsPerRun, int rmseTrialsToReport, int reportEveryN) { if (rmseTrialsToReport > (numTrialsPerRun / reportEveryN)) { throw new IllegalArgumentException("Requesting to report too many RMSE trials, max allowed for args is " + (numTrialsPerRun / reportEveryN)); } CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); cwe.AddAgent(reinforcementAgent); IMap <int, ICollection <IMap <Cell <double>, double> > > runs = CollectionFactory.CreateInsertionOrderedMap <int, ICollection <IMap <Cell <double>, double> > >(); for (int r = 0; r < numRuns; r++) { reinforcementAgent.reset(); ICollection <IMap <Cell <double>, double> > trials = CollectionFactory.CreateQueue <IMap <Cell <double>, double> >(); for (int t = 0; t < numTrialsPerRun; t++) { cwe.executeTrial(); if (0 == t % reportEveryN) { IMap <Cell <double>, double> u = reinforcementAgent .getUtility(); //if (null == u.Get(cw.getCellAt(1, 1))) //{ // throw new IllegalStateException( // "Bad Utility State Encountered: r=" + r // + ", t=" + t + ", u=" + u); //} trials.Add(u); } } runs.Put(r, trials); } IStringBuilder v4_3 = TextFactory.CreateStringBuilder(); IStringBuilder v3_3 = TextFactory.CreateStringBuilder(); IStringBuilder v1_3 = TextFactory.CreateStringBuilder(); IStringBuilder v1_1 = TextFactory.CreateStringBuilder(); IStringBuilder v3_2 = TextFactory.CreateStringBuilder(); IStringBuilder v2_1 = TextFactory.CreateStringBuilder(); for (int t = 0; t < (numTrialsPerRun / reportEveryN); t++) { // Use the last run IMap <Cell <double>, double> u = runs.Get(numRuns - 1).Get(t); v4_3.Append((u.ContainsKey(cw.GetCellAt(4, 3)) ? u.Get(cw .GetCellAt(4, 3)) : 0.0) + "\t"); v3_3.Append((u.ContainsKey(cw.GetCellAt(3, 3)) ? u.Get(cw .GetCellAt(3, 3)) : 0.0) + "\t"); v1_3.Append((u.ContainsKey(cw.GetCellAt(1, 3)) ? u.Get(cw .GetCellAt(1, 3)) : 0.0) + "\t"); v1_1.Append((u.ContainsKey(cw.GetCellAt(1, 1)) ? u.Get(cw .GetCellAt(1, 1)) : 0.0) + "\t"); v3_2.Append((u.ContainsKey(cw.GetCellAt(3, 2)) ? u.Get(cw .GetCellAt(3, 2)) : 0.0) + "\t"); v2_1.Append((u.ContainsKey(cw.GetCellAt(2, 1)) ? u.Get(cw .GetCellAt(2, 1)) : 0.0) + "\t"); } IStringBuilder rmseValues = TextFactory.CreateStringBuilder(); for (int t = 0; t < rmseTrialsToReport; t++) { // Calculate the Root Mean Square Error for utility of 1,1 // for this trial# across all runs double xSsquared = 0; for (int r = 0; r < numRuns; r++) { IMap <Cell <double>, double> u = runs.Get(r).Get(t); double val1_1 = u.Get(cw.GetCellAt(1, 1)); //if (null == val1_1) //{ // throw new IllegalStateException( // "U(1,1,) is not present: r=" + r + ", t=" + t // + ", runs.size=" + runs.Size() // + ", runs(r).Size()=" + runs.Get(r).Size() // + ", u=" + u); //} xSsquared += System.Math.Pow(0.705 - val1_1, 2); } double rmse = System.Math.Sqrt(xSsquared / runs.Size()); rmseValues.Append(rmse); rmseValues.Append("\t"); } System.Console .WriteLine("Note: You may copy and paste the following lines into a spreadsheet to generate graphs of learning rate and RMS error in utility:"); System.Console.WriteLine("(4,3)" + "\t" + v4_3); System.Console.WriteLine("(3,3)" + "\t" + v3_3); System.Console.WriteLine("(1,3)" + "\t" + v1_3); System.Console.WriteLine("(1,1)" + "\t" + v1_1); System.Console.WriteLine("(3,2)" + "\t" + v3_2); System.Console.WriteLine("(2,1)" + "\t" + v2_1); System.Console.WriteLine("RMSeiu" + "\t" + rmseValues); }
public void setUp() { cw = CellWorldFactory.CreateCellWorldForFig17_1(); mdp = MDPFactory.createMDPForFigure17_3(cw); }
public TransitionProbabilityFunctionImpl(CellWorld <Double> cw) { this.cw = cw; }