static void passiveTDAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateMap <Cell <double>, CellWorldAction>(); fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left); PassiveTDAgent <Cell <double>, CellWorldAction> ptda = new PassiveTDAgent <Cell <double>, CellWorldAction>(fixedPolicy, 0.2, 1.0); cwe.AddAgent(ptda); output_utility_learning_rates(ptda, 20, 500, 100, 1); }
/// <summary> /// Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4 /// x 3 environment that presents the agent with a sequential decision /// problem. /// </summary> /// <returns>a cell world representation of Fig 17.1 in AIMA3e.</returns> public static CellWorld <double> CreateCellWorldForFig17_1() { CellWorld <double> cw = new CellWorld <double>(4, 3, -0.04); cw.RemoveCell(2, 2); cw.GetCellAt(4, 3).setContent(1.0); cw.GetCellAt(4, 2).setContent(-1.0); return(cw); }
/** * Returns the allowed actions from a specified cell within the cell world * described in Fig 17.1. * * @param cw * the cell world from figure 17.1. * @return the set of actions allowed at a particular cell. This set will be * empty if at a terminal state. */ public static IActionsFunction <Cell <double>, CellWorldAction> createActionsFunctionForFigure17_1(CellWorld <double> cw) { ISet <Cell <double> > terminals = CollectionFactory.CreateSet <Cell <double> >(); terminals.Add(cw.GetCellAt(4, 3)); terminals.Add(cw.GetCellAt(4, 2)); IActionsFunction <Cell <double>, CellWorldAction> af = new createActionsFunctionForFigure17_1ActionsFunction(terminals); return(af); }
/// <summary> /// Constructs an MDP that can be used to generate the utility values detailed in Fig 17.3. /// </summary> /// <param name="cw">the cell world from figure 17.1.</param> /// <returns>an MDP that can be used to generate the utility values detailed in Fig 17.3.</returns> public static IMarkovDecisionProcess <Cell <double>, CellWorldAction> createMDPForFigure17_3(CellWorld <double> cw) { return(new MDP <Cell <double>, CellWorldAction>(cw.GetCells(), cw.GetCellAt(1, 1), createActionsFunctionForFigure17_1(cw), createTransitionProbabilityFunctionForFigure17_1(cw), createRewardFunctionForFigure17_1())); }
static void qLearningAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); QLearningAgent <Cell <double>, CellWorldAction> qla = new QLearningAgent <Cell <double>, CellWorldAction>( MDPFactory.createActionsFunctionForFigure17_1(cw), CellWorldAction.None, 0.2, 1.0, 5, 2.0); cwe.AddAgent(qla); output_utility_learning_rates(qla, 20, 10000, 500, 20); }
public void testValueIterationForFig17_3() { IMap<Cell<double>, double> U = vi.valueIteration(mdp, 0.0001); Assert.AreEqual(0.705, U.Get(cw.GetCellAt(1, 1)), DELTA_THRESHOLD); Assert.AreEqual(0.762, U.Get(cw.GetCellAt(1, 2)), DELTA_THRESHOLD); Assert.AreEqual(0.812, U.Get(cw.GetCellAt(1, 3)), DELTA_THRESHOLD); Assert.AreEqual(0.655, U.Get(cw.GetCellAt(2, 1)), DELTA_THRESHOLD); Assert.AreEqual(0.868, U.Get(cw.GetCellAt(2, 3)), DELTA_THRESHOLD); Assert.AreEqual(0.611, U.Get(cw.GetCellAt(3, 1)), DELTA_THRESHOLD); Assert.AreEqual(0.660, U.Get(cw.GetCellAt(3, 2)), DELTA_THRESHOLD); Assert.AreEqual(0.918, U.Get(cw.GetCellAt(3, 3)), DELTA_THRESHOLD); Assert.AreEqual(0.388, U.Get(cw.GetCellAt(4, 1)), DELTA_THRESHOLD); Assert.AreEqual(-1.0, U.Get(cw.GetCellAt(4, 2)), DELTA_THRESHOLD); Assert.AreEqual(1.0, U.Get(cw.GetCellAt(4, 3)), DELTA_THRESHOLD); }
public void testMoveUpIntoAdjacentCellChangesPositionCorrectly() { Cell <double> sDelta = cw.Result(cw.GetCellAt(1, 1), CellWorldAction.Up); Assert.AreEqual(1, sDelta.getX()); Assert.AreEqual(2, sDelta.getY()); }
public void testMDPTransitionModel() { Assert.AreEqual(0.8, mdp.transitionProbability(cw.GetCellAt(1, 2), cw.GetCellAt(1, 1), CellWorldAction.Up)); Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 1), cw.GetCellAt(1, 1), CellWorldAction.Up)); Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(2, 1), cw.GetCellAt(1, 1), CellWorldAction.Up)); Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(1, 3), cw.GetCellAt(1, 1), CellWorldAction.Up)); Assert.AreEqual(0.9, mdp.transitionProbability(cw.GetCellAt(1, 1), cw.GetCellAt(1, 1), CellWorldAction.Down)); Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(2, 1), cw.GetCellAt(1, 1), CellWorldAction.Down)); Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(3, 1), cw.GetCellAt(1, 1), CellWorldAction.Down)); Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(1, 2), cw.GetCellAt(1, 1), CellWorldAction.Down)); Assert.AreEqual(0.9, mdp.transitionProbability(cw.GetCellAt(1, 1), cw.GetCellAt(1, 1), CellWorldAction.Left)); Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(2, 1), cw.GetCellAt(1, 1), CellWorldAction.Left)); Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(3, 1), cw.GetCellAt(1, 1), CellWorldAction.Left)); Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 2), cw.GetCellAt(1, 1), CellWorldAction.Left)); Assert.AreEqual(0.8, mdp.transitionProbability(cw.GetCellAt(2, 1), cw.GetCellAt(1, 1), CellWorldAction.Right)); Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 1), cw.GetCellAt(1, 1), CellWorldAction.Right)); Assert.AreEqual(0.1, mdp.transitionProbability(cw.GetCellAt(1, 2), cw.GetCellAt(1, 1), CellWorldAction.Right)); Assert.AreEqual(0.0, mdp.transitionProbability(cw.GetCellAt(1, 3), cw.GetCellAt(1, 1), CellWorldAction.Right)); }
public void testPolicyIterationForFig17_2() { // AIMA3e check with Figure 17.2 (a) IPolicy <Cell <double>, CellWorldAction> policy = pi.policyIteration(mdp); Assert.AreEqual(CellWorldAction.Up, policy.action(cw.GetCellAt(1, 1))); Assert.AreEqual(CellWorldAction.Up, policy.action(cw.GetCellAt(1, 2))); Assert.AreEqual(CellWorldAction.Right, policy.action(cw.GetCellAt(1, 3))); Assert.AreEqual(CellWorldAction.Left, policy.action(cw.GetCellAt(2, 1))); Assert.AreEqual(CellWorldAction.Right, policy.action(cw.GetCellAt(2, 3))); Assert.AreEqual(CellWorldAction.Left, policy.action(cw.GetCellAt(3, 1))); Assert.AreEqual(CellWorldAction.Up, policy.action(cw.GetCellAt(3, 2))); Assert.AreEqual(CellWorldAction.Right, policy.action(cw.GetCellAt(3, 3))); Assert.AreEqual(CellWorldAction.Left, policy.action(cw.GetCellAt(4, 1))); Assert.IsNull(policy.action(cw.GetCellAt(4, 2))); Assert.IsNull(policy.action(cw.GetCellAt(4, 3))); }
protected static void output_utility_learning_rates( ReinforcementAgent <Cell <double>, CellWorldAction> reinforcementAgent, int numRuns, int numTrialsPerRun, int rmseTrialsToReport, int reportEveryN) { if (rmseTrialsToReport > (numTrialsPerRun / reportEveryN)) { throw new IllegalArgumentException("Requesting to report too many RMSE trials, max allowed for args is " + (numTrialsPerRun / reportEveryN)); } CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); cwe.AddAgent(reinforcementAgent); IMap <int, ICollection <IMap <Cell <double>, double> > > runs = CollectionFactory.CreateInsertionOrderedMap <int, ICollection <IMap <Cell <double>, double> > >(); for (int r = 0; r < numRuns; r++) { reinforcementAgent.reset(); ICollection <IMap <Cell <double>, double> > trials = CollectionFactory.CreateQueue <IMap <Cell <double>, double> >(); for (int t = 0; t < numTrialsPerRun; t++) { cwe.executeTrial(); if (0 == t % reportEveryN) { IMap <Cell <double>, double> u = reinforcementAgent .getUtility(); //if (null == u.Get(cw.getCellAt(1, 1))) //{ // throw new IllegalStateException( // "Bad Utility State Encountered: r=" + r // + ", t=" + t + ", u=" + u); //} trials.Add(u); } } runs.Put(r, trials); } IStringBuilder v4_3 = TextFactory.CreateStringBuilder(); IStringBuilder v3_3 = TextFactory.CreateStringBuilder(); IStringBuilder v1_3 = TextFactory.CreateStringBuilder(); IStringBuilder v1_1 = TextFactory.CreateStringBuilder(); IStringBuilder v3_2 = TextFactory.CreateStringBuilder(); IStringBuilder v2_1 = TextFactory.CreateStringBuilder(); for (int t = 0; t < (numTrialsPerRun / reportEveryN); t++) { // Use the last run IMap <Cell <double>, double> u = runs.Get(numRuns - 1).Get(t); v4_3.Append((u.ContainsKey(cw.GetCellAt(4, 3)) ? u.Get(cw .GetCellAt(4, 3)) : 0.0) + "\t"); v3_3.Append((u.ContainsKey(cw.GetCellAt(3, 3)) ? u.Get(cw .GetCellAt(3, 3)) : 0.0) + "\t"); v1_3.Append((u.ContainsKey(cw.GetCellAt(1, 3)) ? u.Get(cw .GetCellAt(1, 3)) : 0.0) + "\t"); v1_1.Append((u.ContainsKey(cw.GetCellAt(1, 1)) ? u.Get(cw .GetCellAt(1, 1)) : 0.0) + "\t"); v3_2.Append((u.ContainsKey(cw.GetCellAt(3, 2)) ? u.Get(cw .GetCellAt(3, 2)) : 0.0) + "\t"); v2_1.Append((u.ContainsKey(cw.GetCellAt(2, 1)) ? u.Get(cw .GetCellAt(2, 1)) : 0.0) + "\t"); } IStringBuilder rmseValues = TextFactory.CreateStringBuilder(); for (int t = 0; t < rmseTrialsToReport; t++) { // Calculate the Root Mean Square Error for utility of 1,1 // for this trial# across all runs double xSsquared = 0; for (int r = 0; r < numRuns; r++) { IMap <Cell <double>, double> u = runs.Get(r).Get(t); double val1_1 = u.Get(cw.GetCellAt(1, 1)); //if (null == val1_1) //{ // throw new IllegalStateException( // "U(1,1,) is not present: r=" + r + ", t=" + t // + ", runs.size=" + runs.Size() // + ", runs(r).Size()=" + runs.Get(r).Size() // + ", u=" + u); //} xSsquared += System.Math.Pow(0.705 - val1_1, 2); } double rmse = System.Math.Sqrt(xSsquared / runs.Size()); rmseValues.Append(rmse); rmseValues.Append("\t"); } System.Console .WriteLine("Note: You may copy and paste the following lines into a spreadsheet to generate graphs of learning rate and RMS error in utility:"); System.Console.WriteLine("(4,3)" + "\t" + v4_3); System.Console.WriteLine("(3,3)" + "\t" + v3_3); System.Console.WriteLine("(1,3)" + "\t" + v1_3); System.Console.WriteLine("(1,1)" + "\t" + v1_1); System.Console.WriteLine("(3,2)" + "\t" + v3_2); System.Console.WriteLine("(2,1)" + "\t" + v2_1); System.Console.WriteLine("RMSeiu" + "\t" + rmseValues); }
static void passiveADPAgentDemo() { System.Console.WriteLine("======================="); System.Console.WriteLine("DEMO: Passive-ADP-Agent"); System.Console.WriteLine("======================="); System.Console.WriteLine("Figure 21.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateInsertionOrderedMap <Cell <double>, CellWorldAction>(); fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left); PassiveADPAgent <Cell <double>, CellWorldAction> padpa = new PassiveADPAgent <Cell <double>, CellWorldAction>( fixedPolicy, cw.GetCells(), cw.GetCellAt(1, 1), MDPFactory.createActionsFunctionForFigure17_1(cw), new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(10, 1.0)); cwe.AddAgent(padpa); output_utility_learning_rates(padpa, 20, 100, 100, 1); System.Console.WriteLine("========================="); }
static void policyIterationDemo() { System.Console.WriteLine("DEMO: Policy Iteration"); System.Console.WriteLine("======================"); System.Console.WriteLine("Figure 17.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw); PolicyIteration <Cell <double>, CellWorldAction> pi = new PolicyIteration <Cell <double>, CellWorldAction>( new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(50, 1.0)); IPolicy <Cell <double>, CellWorldAction> policy = pi.policyIteration(mdp); System.Console.WriteLine("(1,1) = " + policy.action(cw.GetCellAt(1, 1))); System.Console.WriteLine("(1,2) = " + policy.action(cw.GetCellAt(1, 2))); System.Console.WriteLine("(1,3) = " + policy.action(cw.GetCellAt(1, 3))); System.Console.WriteLine("(2,1) = " + policy.action(cw.GetCellAt(2, 1))); System.Console.WriteLine("(2,3) = " + policy.action(cw.GetCellAt(2, 3))); System.Console.WriteLine("(3,1) = " + policy.action(cw.GetCellAt(3, 1))); System.Console.WriteLine("(3,2) = " + policy.action(cw.GetCellAt(3, 2))); System.Console.WriteLine("(3,3) = " + policy.action(cw.GetCellAt(3, 3))); System.Console.WriteLine("(4,1) = " + policy.action(cw.GetCellAt(4, 1))); System.Console.WriteLine("(4,2) = " + policy.action(cw.GetCellAt(4, 2))); System.Console.WriteLine("(4,3) = " + policy.action(cw.GetCellAt(4, 3))); System.Console.WriteLine("========================="); }
static void valueIterationDemo() { System.Console.WriteLine("DEMO: Value Iteration"); System.Console.WriteLine("====================="); System.Console.WriteLine("Figure 17.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); IMarkovDecisionProcess <Cell <double>, CellWorldAction> mdp = MDPFactory.createMDPForFigure17_3(cw); ValueIteration <Cell <double>, CellWorldAction> vi = new ValueIteration <Cell <double>, CellWorldAction>(1.0); IMap <Cell <double>, double> U = vi.valueIteration(mdp, 0.0001); System.Console.WriteLine("(1,1) = " + U.Get(cw.GetCellAt(1, 1))); System.Console.WriteLine("(1,2) = " + U.Get(cw.GetCellAt(1, 2))); System.Console.WriteLine("(1,3) = " + U.Get(cw.GetCellAt(1, 3))); System.Console.WriteLine("(2,1) = " + U.Get(cw.GetCellAt(2, 1))); System.Console.WriteLine("(2,3) = " + U.Get(cw.GetCellAt(2, 3))); System.Console.WriteLine("(3,1) = " + U.Get(cw.GetCellAt(3, 1))); System.Console.WriteLine("(3,2) = " + U.Get(cw.GetCellAt(3, 2))); System.Console.WriteLine("(3,3) = " + U.Get(cw.GetCellAt(3, 3))); System.Console.WriteLine("(4,1) = " + U.Get(cw.GetCellAt(4, 1))); System.Console.WriteLine("(4,2) = " + U.Get(cw.GetCellAt(4, 2))); System.Console.WriteLine("(4,3) = " + U.Get(cw.GetCellAt(4, 3))); System.Console.WriteLine("========================="); }