private void __build_UTable(List<KeyValuePair<Hashtable, Hashtable>> policyHistory) { System.Threading.Thread t = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart((thread) => { var origin_txt = this.toolStripStatus.Text; this.toolStripStatus.Text = "Calculating utility values..."; List<Hashtable> tdlist = new List<Hashtable>(); tdl = new ReinforcementLearning.TDLambda(ql, Properties.Settings.Default.Lambda); TDLambdaUtilityProgress = new Hashtable(); float c = 0; foreach (var epic in policyHistory) { //tdl.QTable = epic.Key; foreach (Point state in epic.Value.Keys) { tdl.InitialState = state; tdl.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; })); // store td-lambda utility progress for the state if (TDLambdaUtilityProgress.Contains(state)) (TDLambdaUtilityProgress[state] as List<float>).Add((float)tdl.UTable[state]); else TDLambdaUtilityProgress.Add(state, new List<float>() { (float)tdl.UTable[state] }); } this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100); tdlist.Add(tdl.UTable); } adp = new ReinforcementLearning.ADP(ql); List<Hashtable> adplist = new List<Hashtable>(); ADPUtilityProgress = new Hashtable(); foreach (var epic in policyHistory) { //tdl.QTable = epic.Key; foreach (Point state in epic.Value.Keys) { adp.InitialState = state; adp.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; })); // store td-lambda utility progress for the state if (ADPUtilityProgress.Contains(state)) (ADPUtilityProgress[state] as List<float>).Add((float)adp.UTable[state]); else ADPUtilityProgress.Add(state, new List<float>() { (float)adp.UTable[state] }); } this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100); adplist.Add(adp.UTable); } __plot_utility(tdl, adp); this.toolStripStatus.Text = origin_txt; ThreadsPool.Remove(thread as System.Threading.Thread); })); t.Start(t); ThreadsPool.Add(t); }
private void loadToolStripMenuItem_Click(object sender, EventArgs e) { __enable_all_menus(false); using (OpenFileDialog sfd = new OpenFileDialog()) { sfd.DefaultExt = "dat"; sfd.AddExtension = true; sfd.Filter = "Data files (*.dat)|*.dat"; #if !__DEBUG_PLOT__ var res = sfd.ShowDialog(this); if (res == System.Windows.Forms.DialogResult.OK) { #else sfd.FileName = "sarsa.dat"; #endif BinaryFormatter bf = new BinaryFormatter(); { using (var fs = sfd.OpenFile()) { g = new Grid((Grid.BlockStatus[,])bf.Deserialize(fs), this.grid); if (ql == null) ql = new ReinforcementLearning.QLearning( this.g, new List<GridHelper.Directions>(Enum.GetValues(typeof(GridHelper.Directions)).Cast<GridHelper.Directions>()), Properties.Settings.Default.Gamma, Properties.Settings.Default.Alpha); if (tdl == null) tdl = new ReinforcementLearning.TDLambda( ql, Properties.Settings.Default.Lambda); if (adp == null) adp = new ReinforcementLearning.ADP( ql); ql.QTable = (Hashtable)bf.Deserialize(fs); ql.VisitedStateActions = (Hashtable)bf.Deserialize(fs); ql.StepCounter = (long)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) tdl.UTable = (Hashtable)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) this.TDLambdaUtilityProgress = (Hashtable)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) adp.UTable = (Hashtable)bf.Deserialize(fs); // support for non-UTable contain files if (fs.Position < fs.Length) this.ADPUtilityProgress = (Hashtable)bf.Deserialize(fs); } } __reload_grid(); __plot_policy(ql); __plot_utility(tdl, adp); this.toolStripStatus.Text = "The QTable saved successfully...."; #if !__DEBUG_PLOT__ } #endif } __enable_all_menus(true); }