private void __learn_policy(object sender) { int max_iter = Properties.Settings.Default.MaxLearningIteration; long totall_step_counter = 0; var Actions = new List<GridHelper.Directions>(Enum.GetValues(typeof(GridHelper.Directions)).Cast<GridHelper.Directions>()); tdl = null; this.policyHistory = new List<KeyValuePair<Hashtable, Hashtable>>(); for (int i = 0; i < max_iter; i++) { // if the Q-Learning has been invoked? if (sender == this.qLearningToolStripMenuItem) // init the Q-learning instance ql = new ReinforcementLearning.QLearning( this.g, Actions, Properties.Settings.Default.Gamma, Properties.Settings.Default.Alpha, ql == null ? null : ql.QTable); // if the SARSA-Learning has been invoked? else if (sender == this.SARSAToolStripMenuItem) // init the SARSA-learning instance ql = new ReinforcementLearning.SarsaLearning( this.g, Actions, Properties.Settings.Default.Gamma, Properties.Settings.Default.Alpha, ql == null ? null : ql.QTable); else if (sender == this.sARSALambdaToolStripMenuItem) ql = new ReinforcementLearning.SarsaLambdaLearning( this.g, Actions, Properties.Settings.Default.Gamma, Properties.Settings.Default.Alpha, Properties.Settings.Default.Lambda, ql == null ? null : ql.QTable); else if (sender == this.qLambdaToolStripMenuItem) ql = new ReinforcementLearning.QLambdaLearning( this.g, Actions, Properties.Settings.Default.Gamma, Properties.Settings.Default.Alpha, Properties.Settings.Default.Lambda, ql == null ? null : ql.QTable); // fail-safe else { MessageBox.Show("Invalid learning invoke ...", "Ops!!", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } // learn the grid ql.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; })); // clone the QTbale policyHistory.Add(new KeyValuePair<Hashtable, Hashtable>(ql.QTable.Clone() as System.Collections.Hashtable, ql.VisitedState.Clone() as System.Collections.Hashtable)); // sum-up the steps' counters totall_step_counter += ql.StepCounter; // indicate the results this.toolStripStatus.Text = String.Format("{0}% Of {1} episodes passed - Last episode's steps#: {2} - Totall episodes' step#: {3} ", (i + 1) * 100 / (max_iter), ql.GetType().Name, ql.StepCounter, totall_step_counter); } this.toolStripStatus.Text = String.Format("The model has learned by {0} with total# {1} of steps...", ql.GetType().Name, totall_step_counter); this.__plot_policy(ql); this.__build_UTable(this.policyHistory); }
private void __build_UTable(List<KeyValuePair<Hashtable, Hashtable>> policyHistory) { System.Threading.Thread t = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart((thread) => { var origin_txt = this.toolStripStatus.Text; this.toolStripStatus.Text = "Calculating utility values..."; List<Hashtable> tdlist = new List<Hashtable>(); tdl = new ReinforcementLearning.TDLambda(ql, Properties.Settings.Default.Lambda); TDLambdaUtilityProgress = new Hashtable(); float c = 0; foreach (var epic in policyHistory) { //tdl.QTable = epic.Key; foreach (Point state in epic.Value.Keys) { tdl.InitialState = state; tdl.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; })); // store td-lambda utility progress for the state if (TDLambdaUtilityProgress.Contains(state)) (TDLambdaUtilityProgress[state] as List<float>).Add((float)tdl.UTable[state]); else TDLambdaUtilityProgress.Add(state, new List<float>() { (float)tdl.UTable[state] }); } this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100); tdlist.Add(tdl.UTable); } adp = new ReinforcementLearning.ADP(ql); List<Hashtable> adplist = new List<Hashtable>(); ADPUtilityProgress = new Hashtable(); foreach (var epic in policyHistory) { //tdl.QTable = epic.Key; foreach (Point state in epic.Value.Keys) { adp.InitialState = state; adp.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; })); // store td-lambda utility progress for the state if (ADPUtilityProgress.Contains(state)) (ADPUtilityProgress[state] as List<float>).Add((float)adp.UTable[state]); else ADPUtilityProgress.Add(state, new List<float>() { (float)adp.UTable[state] }); } this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100); adplist.Add(adp.UTable); } __plot_utility(tdl, adp); this.toolStripStatus.Text = origin_txt; ThreadsPool.Remove(thread as System.Threading.Thread); })); t.Start(t); ThreadsPool.Add(t); }
private void loadToolStripMenuItem_Click(object sender, EventArgs e) { __enable_all_menus(false); using (OpenFileDialog sfd = new OpenFileDialog()) { sfd.DefaultExt = "dat"; sfd.AddExtension = true; sfd.Filter = "Data files (*.dat)|*.dat"; #if !__DEBUG_PLOT__ var res = sfd.ShowDialog(this); if (res == System.Windows.Forms.DialogResult.OK) { #else sfd.FileName = "sarsa.dat"; #endif BinaryFormatter bf = new BinaryFormatter(); { using (var fs = sfd.OpenFile()) { g = new Grid((Grid.BlockStatus[,])bf.Deserialize(fs), this.grid); if (ql == null) ql = new ReinforcementLearning.QLearning( this.g, new List<GridHelper.Directions>(Enum.GetValues(typeof(GridHelper.Directions)).Cast<GridHelper.Directions>()), Properties.Settings.Default.Gamma, Properties.Settings.Default.Alpha); if (tdl == null) tdl = new ReinforcementLearning.TDLambda( ql, Properties.Settings.Default.Lambda); if (adp == null) adp = new ReinforcementLearning.ADP( ql); ql.QTable = (Hashtable)bf.Deserialize(fs); ql.VisitedStateActions = (Hashtable)bf.Deserialize(fs); ql.StepCounter = (long)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) tdl.UTable = (Hashtable)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) this.TDLambdaUtilityProgress = (Hashtable)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) adp.UTable = (Hashtable)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) this.ADPUtilityProgress = (Hashtable)bf.Deserialize(fs); } } __reload_grid(); __plot_policy(ql); __plot_utility(tdl, adp); this.toolStripStatus.Text = "The QTable saved successfully...."; #if !__DEBUG_PLOT__ } #endif } __enable_all_menus(true); }