public GridWorld() { availableActions = new List<int[]>(); availableActions.Add(new int[2] { -1, 0 }); availableActions.Add(new int[2] { 0, -1 }); availableActions.Add(new int[2] { 1, 0 }); availableActions.Add(new int[2] { 0, 1 }); //availableActions.Add(new int[2] { -1, -1 }); //availableActions.Add(new int[2] { 1, -1 }); //availableActions.Add(new int[2] { -1, 1 }); //availableActions.Add(new int[2] { 1, 1 }); // set the default agent startState = new int[2] { 1, 1 }; Policy<int[], int[]> policy = new EGreedyPolicy<int[], int[]>(); ActionValue<int[], int[]> value = new ModelFreeValue<int[], int[]>(new IntArrayComparer(), new IntArrayComparer(), availableActions, startState); agent = new Agent<int[], int[]>(startState, policy, value, availableActions); }
static void Main(string[] args) { int wS = 48; //WorldSize p[0] bool RL = true; //RLMethod p[1]; 'F' for QL, 'T' For MB double a = 0.1; //alpha p[2]; double g = 0.8; //Gamma p[3]; int tO = wS; //timeOut p[4]; double mR = 1; //Manager Rewards p[5]; Policy<int[], int[]> cP = new EGreedyPolicy<int[], int[]>(); //chosen Policy p[6] // task-switch test int runs = 48; int goalCt = 10; List<double>[] stepsToGoal = new List<double>[runs]; List<double>[] cumModelUse = new List<double>[runs]; System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Start(); ParallelOptions op = new ParallelOptions() { MaxDegreeOfParallelism = 8 }; //for (int run = 0; run < runs; run++) Parallel.For(0, runs, op, run => { cumModelUse[run] = new List<double>(); // instantiate world World thisWorld = new GridWorld(); // load 1st map thisWorld.Load("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\map10.bmp"); // add agent System.Threading.Thread.Sleep(run * 100); // staggered instantiation to avoid identical random number generators //thisWorld.addAgent(typeof(EGreedyPolicy<,>), typeof(MultiGridWorldModel<,>), 8); // thisWorld.addAgent(typeof(EGreedyPolicy<,>), typeof(Boss<,>), wS, RL, a, g, tO, mR, cP); // run PerformanceStats stats = new PerformanceStats(); while (stats.stepsToGoal.Count <= goalCt) { stats = thisWorld.stepAgent(""); if (stats.stepsToGoal.Last() == 0) { cumModelUse[run].Add(stats.modelAccesses + stats.modelUpdates); Console.WriteLine("run " + run.ToString() + " goal count: " + stats.stepsToGoal.Count); } } // switch task thisWorld.Load("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\map10a.bmp"); // run again while (stats.stepsToGoal.Count <= goalCt * 2) { stats = thisWorld.stepAgent(""); if (stats.stepsToGoal.Last() == 0) { cumModelUse[run].Add(stats.modelAccesses + stats.modelUpdates); Console.WriteLine("run " + run.ToString() + " goal count: " + stats.stepsToGoal.Count); } } stepsToGoal[run] = stats.stepsToGoal; }); System.IO.StreamWriter writer = new System.IO.StreamWriter("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\stepsToGoal.csv"); for (int i = 0; i < stepsToGoal[0].Count; i++) { List<string> line = new List<string>(); foreach (List<double> series in stepsToGoal) { line.Add(series[i].ToString()); } writer.WriteLine(string.Join(",", line)); } writer.Flush(); writer.Close(); writer = new System.IO.StreamWriter("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\modelUse.csv"); for (int i = 0; i < cumModelUse[0].Count; i++) { List<string> line = new List<string>(); foreach (List<double> series in cumModelUse) { line.Add(series[i].ToString()); } writer.WriteLine(string.Join(",", line)); } writer.Flush(); writer.Close(); }
private void button2_Click(object sender, EventArgs e) { int wS; //WorldSize p[0] bool RL;//RLMethod p[1]; 'F' for QL, 'T' For MB double a; //alpha p[2]; double g;//Gamma p[3]; int tO; //timeOut p[4]; double mR; //Manager Rewards p[5]; /* Console.WriteLine("Use Default values?"); string ans = Console.ReadLine(); if (ans[0] == 'Y' || ans[0] == 'y') {*/ wS = 16; //WorldSize p[0] RL = false;//RLMethod p[1]; 'F' for QL, 'T' For MB a = 0.5; //alpha p[2]; g = 0.5; //Gamma p[3]; tO = wS; //timeOut p[4]; mR = 1; //Manager Rewards p[5]; /* } else { Console.WriteLine("WorldSize"); wS = Convert.ToInt32(Console.ReadLine()); //WorldSize p[0] Console.WriteLine("1 for ModelBased :: 0 For Q-Learning"); if (Console.ReadLine() == "0") RL = false;//RLMethod p[1]; 'F' for QL, 'T' For MB else RL = true; Console.WriteLine("AlphaValue"); a = Convert.ToDouble(Console.ReadLine()); //alpha p[2]; Console.WriteLine("GammaValue"); g = Convert.ToDouble(Console.ReadLine()); //Gamma p[3]; Console.WriteLine("TimeOut"); tO = Convert.ToInt32(Console.ReadLine()); //timeOut p[4]; Console.WriteLine("ManagerReward"); mR = Convert.ToDouble(Console.ReadLine()); Console.WriteLine("PRESS RUN"); }*/ Policy<int[], int[]> cP = new EGreedyPolicy<int[], int[]> (); //chosen Policy p[6] world.addAgent(typeof(EGreedyPolicy<,>), typeof(feudalRL_Library.Boss<,>),wS,RL,a,g,tO,mR,cP); chart1.Series.Add(chart1.Series.Last().Name + "1"); chart1.Series.Last().ChartType = System.Windows.Forms.DataVisualization.Charting.SeriesChartType.Line; }
static void Main(string[] args) { int wS = 48; //WorldSize p[0] bool RL = true; //RLMethod p[1]; 'F' for QL, 'T' For MB double a = 0.1; //alpha p[2]; double g = 0.8; //Gamma p[3]; int tO = wS; //timeOut p[4]; double mR = 1; //Manager Rewards p[5]; Policy <int[], int[]> cP = new EGreedyPolicy <int[], int[]>(); //chosen Policy p[6] // task-switch test int runs = 48; int goalCt = 10; List <double>[] stepsToGoal = new List <double> [runs]; List <double>[] cumModelUse = new List <double> [runs]; System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Start(); ParallelOptions op = new ParallelOptions() { MaxDegreeOfParallelism = 8 }; //for (int run = 0; run < runs; run++) Parallel.For(0, runs, op, run => { cumModelUse[run] = new List <double>(); // instantiate world World thisWorld = new GridWorld(); // load 1st map thisWorld.Load("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\map10.bmp"); // add agent System.Threading.Thread.Sleep(run * 100); // staggered instantiation to avoid identical random number generators //thisWorld.addAgent(typeof(EGreedyPolicy<,>), typeof(MultiGridWorldModel<,>), 8); // thisWorld.addAgent(typeof(EGreedyPolicy<,>), typeof(Boss<,>), wS, RL, a, g, tO, mR, cP); // run PerformanceStats stats = new PerformanceStats(); while (stats.stepsToGoal.Count <= goalCt) { stats = thisWorld.stepAgent(""); if (stats.stepsToGoal.Last() == 0) { cumModelUse[run].Add(stats.modelAccesses + stats.modelUpdates); Console.WriteLine("run " + run.ToString() + " goal count: " + stats.stepsToGoal.Count); } } // switch task thisWorld.Load("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\map10a.bmp"); // run again while (stats.stepsToGoal.Count <= goalCt * 2) { stats = thisWorld.stepAgent(""); if (stats.stepsToGoal.Last() == 0) { cumModelUse[run].Add(stats.modelAccesses + stats.modelUpdates); Console.WriteLine("run " + run.ToString() + " goal count: " + stats.stepsToGoal.Count); } } stepsToGoal[run] = stats.stepsToGoal; }); System.IO.StreamWriter writer = new System.IO.StreamWriter("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\stepsToGoal.csv"); for (int i = 0; i < stepsToGoal[0].Count; i++) { List <string> line = new List <string>(); foreach (List <double> series in stepsToGoal) { line.Add(series[i].ToString()); } writer.WriteLine(string.Join(",", line)); } writer.Flush(); writer.Close(); writer = new System.IO.StreamWriter("C:\\Users\\Eric\\Google Drive\\Lethbridge Projects\\modelUse.csv"); for (int i = 0; i < cumModelUse[0].Count; i++) { List <string> line = new List <string>(); foreach (List <double> series in cumModelUse) { line.Add(series[i].ToString()); } writer.WriteLine(string.Join(",", line)); } writer.Flush(); writer.Close(); }
public MountainCar() { availableActions.Add(-1); //availableActions.Add(0); availableActions.Add(1); rndStartState(out _position, out _velocity); // set the default agent Policy<int[], int> policy = new EGreedyPolicy<int[], int>(); ActionValue<int[], int> value = new ModelFreeValue<int[], int>(new IntArrayComparer(), EqualityComparer<int>.Default, availableActions, discretizeState(_position, _velocity)); agent = new Agent<int[], int>(discretizeState(_position, _velocity), policy, value, availableActions); // create the hill bitmap for (int i=0; i<17; i++) { double position = (0.5 + 1.2) / (18 - 1) * i - 1.2; double amplitude = Math.Sin(3 * position); amplitude = amplitude*50+50; hill.SetPixel(i, (int)(100-amplitude), Color.ForestGreen); } }
public Taxi() { dropSites.Add(null); availableActions.Add(1);//left availableActions.Add(2);//up availableActions.Add(3);//right availableActions.Add(4);//down availableActions.Add(5);//pickup availableActions.Add(6);//drop // set the default agent Policy<int[], int> policy = new EGreedyPolicy<int[], int>(); ActionValue<int[], int> value = new ModelFreeValue<int[], int>(new IntArrayComparer(), EqualityComparer<int>.Default, availableActions, new int[4] { 1, 2, 1, 10 }); agent = new Agent<int[], int>(new int[4] { 1, 2, 10, 1}, policy, value, availableActions); }