static void passiveTDAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateMap <Cell <double>, CellWorldAction>(); fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left); PassiveTDAgent <Cell <double>, CellWorldAction> ptda = new PassiveTDAgent <Cell <double>, CellWorldAction>(fixedPolicy, 0.2, 1.0); cwe.AddAgent(ptda); output_utility_learning_rates(ptda, 20, 500, 100, 1); }
static void qLearningAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); QLearningAgent <Cell <double>, CellWorldAction> qla = new QLearningAgent <Cell <double>, CellWorldAction>( MDPFactory.createActionsFunctionForFigure17_1(cw), CellWorldAction.None, 0.2, 1.0, 5, 2.0); cwe.AddAgent(qla); output_utility_learning_rates(qla, 20, 10000, 500, 20); }
static void passiveADPAgentDemo() { System.Console.WriteLine("======================="); System.Console.WriteLine("DEMO: Passive-ADP-Agent"); System.Console.WriteLine("======================="); System.Console.WriteLine("Figure 21.3"); System.Console.WriteLine("-----------"); CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); IMap <Cell <double>, CellWorldAction> fixedPolicy = CollectionFactory.CreateInsertionOrderedMap <Cell <double>, CellWorldAction>(); fixedPolicy.Put(cw.GetCellAt(1, 1), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(1, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(2, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(2, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(3, 1), CellWorldAction.Left); fixedPolicy.Put(cw.GetCellAt(3, 2), CellWorldAction.Up); fixedPolicy.Put(cw.GetCellAt(3, 3), CellWorldAction.Right); fixedPolicy.Put(cw.GetCellAt(4, 1), CellWorldAction.Left); PassiveADPAgent <Cell <double>, CellWorldAction> padpa = new PassiveADPAgent <Cell <double>, CellWorldAction>( fixedPolicy, cw.GetCells(), cw.GetCellAt(1, 1), MDPFactory.createActionsFunctionForFigure17_1(cw), new ModifiedPolicyEvaluation <Cell <double>, CellWorldAction>(10, 1.0)); cwe.AddAgent(padpa); output_utility_learning_rates(padpa, 20, 100, 100, 1); System.Console.WriteLine("========================="); }
protected static void output_utility_learning_rates( ReinforcementAgent <Cell <double>, CellWorldAction> reinforcementAgent, int numRuns, int numTrialsPerRun, int rmseTrialsToReport, int reportEveryN) { if (rmseTrialsToReport > (numTrialsPerRun / reportEveryN)) { throw new IllegalArgumentException("Requesting to report too many RMSE trials, max allowed for args is " + (numTrialsPerRun / reportEveryN)); } CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); cwe.AddAgent(reinforcementAgent); IMap <int, ICollection <IMap <Cell <double>, double> > > runs = CollectionFactory.CreateInsertionOrderedMap <int, ICollection <IMap <Cell <double>, double> > >(); for (int r = 0; r < numRuns; r++) { reinforcementAgent.reset(); ICollection <IMap <Cell <double>, double> > trials = CollectionFactory.CreateQueue <IMap <Cell <double>, double> >(); for (int t = 0; t < numTrialsPerRun; t++) { cwe.executeTrial(); if (0 == t % reportEveryN) { IMap <Cell <double>, double> u = reinforcementAgent .getUtility(); //if (null == u.Get(cw.getCellAt(1, 1))) //{ // throw new IllegalStateException( // "Bad Utility State Encountered: r=" + r // + ", t=" + t + ", u=" + u); //} trials.Add(u); } } runs.Put(r, trials); } IStringBuilder v4_3 = TextFactory.CreateStringBuilder(); IStringBuilder v3_3 = TextFactory.CreateStringBuilder(); IStringBuilder v1_3 = TextFactory.CreateStringBuilder(); IStringBuilder v1_1 = TextFactory.CreateStringBuilder(); IStringBuilder v3_2 = TextFactory.CreateStringBuilder(); IStringBuilder v2_1 = TextFactory.CreateStringBuilder(); for (int t = 0; t < (numTrialsPerRun / reportEveryN); t++) { // Use the last run IMap <Cell <double>, double> u = runs.Get(numRuns - 1).Get(t); v4_3.Append((u.ContainsKey(cw.GetCellAt(4, 3)) ? u.Get(cw .GetCellAt(4, 3)) : 0.0) + "\t"); v3_3.Append((u.ContainsKey(cw.GetCellAt(3, 3)) ? u.Get(cw .GetCellAt(3, 3)) : 0.0) + "\t"); v1_3.Append((u.ContainsKey(cw.GetCellAt(1, 3)) ? u.Get(cw .GetCellAt(1, 3)) : 0.0) + "\t"); v1_1.Append((u.ContainsKey(cw.GetCellAt(1, 1)) ? u.Get(cw .GetCellAt(1, 1)) : 0.0) + "\t"); v3_2.Append((u.ContainsKey(cw.GetCellAt(3, 2)) ? u.Get(cw .GetCellAt(3, 2)) : 0.0) + "\t"); v2_1.Append((u.ContainsKey(cw.GetCellAt(2, 1)) ? u.Get(cw .GetCellAt(2, 1)) : 0.0) + "\t"); } IStringBuilder rmseValues = TextFactory.CreateStringBuilder(); for (int t = 0; t < rmseTrialsToReport; t++) { // Calculate the Root Mean Square Error for utility of 1,1 // for this trial# across all runs double xSsquared = 0; for (int r = 0; r < numRuns; r++) { IMap <Cell <double>, double> u = runs.Get(r).Get(t); double val1_1 = u.Get(cw.GetCellAt(1, 1)); //if (null == val1_1) //{ // throw new IllegalStateException( // "U(1,1,) is not present: r=" + r + ", t=" + t // + ", runs.size=" + runs.Size() // + ", runs(r).Size()=" + runs.Get(r).Size() // + ", u=" + u); //} xSsquared += System.Math.Pow(0.705 - val1_1, 2); } double rmse = System.Math.Sqrt(xSsquared / runs.Size()); rmseValues.Append(rmse); rmseValues.Append("\t"); } System.Console .WriteLine("Note: You may copy and paste the following lines into a spreadsheet to generate graphs of learning rate and RMS error in utility:"); System.Console.WriteLine("(4,3)" + "\t" + v4_3); System.Console.WriteLine("(3,3)" + "\t" + v3_3); System.Console.WriteLine("(1,3)" + "\t" + v1_3); System.Console.WriteLine("(1,1)" + "\t" + v1_1); System.Console.WriteLine("(3,2)" + "\t" + v3_2); System.Console.WriteLine("(2,1)" + "\t" + v2_1); System.Console.WriteLine("RMSeiu" + "\t" + rmseValues); }