Example #1
0
 private void __learn_policy(object sender)
 {
     int max_iter = Properties.Settings.Default.MaxLearningIteration;
     long totall_step_counter = 0;
     var Actions = new List<GridHelper.Directions>(Enum.GetValues(typeof(GridHelper.Directions)).Cast<GridHelper.Directions>());
     tdl = null;
     this.policyHistory = new List<KeyValuePair<Hashtable, Hashtable>>();
     for (int i = 0; i < max_iter; i++)
     {
         // if the Q-Learning has been invoked?
         if (sender == this.qLearningToolStripMenuItem)
             // init the Q-learning instance
             ql = new ReinforcementLearning.QLearning(
                  this.g,
                  Actions,
                  Properties.Settings.Default.Gamma,
                  Properties.Settings.Default.Alpha,
                  ql == null ? null : ql.QTable);
         // if the SARSA-Learning has been invoked?
         else if (sender == this.SARSAToolStripMenuItem)
             // init the SARSA-learning instance
             ql = new ReinforcementLearning.SarsaLearning(
                  this.g,
                  Actions,
                  Properties.Settings.Default.Gamma,
                  Properties.Settings.Default.Alpha,
                  ql == null ? null : ql.QTable);
         else if (sender == this.sARSALambdaToolStripMenuItem)
             ql = new ReinforcementLearning.SarsaLambdaLearning(
                  this.g,
                  Actions,
                  Properties.Settings.Default.Gamma,
                  Properties.Settings.Default.Alpha,
                  Properties.Settings.Default.Lambda,
                  ql == null ? null : ql.QTable);
         else if (sender == this.qLambdaToolStripMenuItem)
             ql = new ReinforcementLearning.QLambdaLearning(
                  this.g,
                  Actions,
                  Properties.Settings.Default.Gamma,
                  Properties.Settings.Default.Alpha,
                  Properties.Settings.Default.Lambda,
                  ql == null ? null : ql.QTable);
         // fail-safe
         else { MessageBox.Show("Invalid learning invoke ...", "Ops!!", MessageBoxButtons.OK, MessageBoxIcon.Error); return; }
         // learn the grid
         ql.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; }));
         // clone the QTbale
         policyHistory.Add(new KeyValuePair<Hashtable, Hashtable>(ql.QTable.Clone() as System.Collections.Hashtable, ql.VisitedState.Clone() as System.Collections.Hashtable));
         // sum-up the steps' counters
         totall_step_counter += ql.StepCounter;
         // indicate the results
         this.toolStripStatus.Text = String.Format("{0}% Of {1} episodes passed - Last episode's steps#: {2} - Totall episodes' step#: {3} ", (i + 1) * 100 / (max_iter), ql.GetType().Name, ql.StepCounter, totall_step_counter);
     }
     this.toolStripStatus.Text = String.Format("The model has learned by {0} with total# {1} of steps...", ql.GetType().Name, totall_step_counter);
     this.__plot_policy(ql);
     this.__build_UTable(this.policyHistory);
 }
Example #2
0
        private void __build_UTable(List<KeyValuePair<Hashtable, Hashtable>> policyHistory)
        {
            System.Threading.Thread t = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart((thread) =>
            {
                var origin_txt = this.toolStripStatus.Text;
                this.toolStripStatus.Text = "Calculating utility values...";
                List<Hashtable> tdlist = new List<Hashtable>();
                tdl = new ReinforcementLearning.TDLambda(ql, Properties.Settings.Default.Lambda);
                TDLambdaUtilityProgress = new Hashtable();
                float c = 0;
                foreach (var epic in policyHistory)
                {
                    //tdl.QTable = epic.Key;
                    foreach (Point state in epic.Value.Keys)
                    {
                        tdl.InitialState = state;
                        tdl.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; }));
                        // store td-lambda utility progress for the state
                        if (TDLambdaUtilityProgress.Contains(state))
                            (TDLambdaUtilityProgress[state] as List<float>).Add((float)tdl.UTable[state]);
                        else
                            TDLambdaUtilityProgress.Add(state, new List<float>() { (float)tdl.UTable[state] });
                    }
                    this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100);
                    tdlist.Add(tdl.UTable);
                }
                adp = new ReinforcementLearning.ADP(ql);
                List<Hashtable> adplist = new List<Hashtable>();
                ADPUtilityProgress = new Hashtable();
                foreach (var epic in policyHistory)
                {
                    //tdl.QTable = epic.Key;
                    foreach (Point state in epic.Value.Keys)
                    {
                        adp.InitialState = state;
                        adp.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; }));
                        // store td-lambda utility progress for the state
                        if (ADPUtilityProgress.Contains(state))
                            (ADPUtilityProgress[state] as List<float>).Add((float)adp.UTable[state]);
                        else
                            ADPUtilityProgress.Add(state, new List<float>() { (float)adp.UTable[state] });
                    }
                    this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100);
                    adplist.Add(adp.UTable);
                }
                __plot_utility(tdl, adp);
                this.toolStripStatus.Text = origin_txt;
                ThreadsPool.Remove(thread as System.Threading.Thread);
            }));
            t.Start(t);
            ThreadsPool.Add(t);

        }
Example #3
0
        private void loadToolStripMenuItem_Click(object sender, EventArgs e)
        {
            __enable_all_menus(false);
            using (OpenFileDialog sfd = new OpenFileDialog())
            {
                sfd.DefaultExt = "dat";
                sfd.AddExtension = true;
                sfd.Filter = "Data files (*.dat)|*.dat";
#if !__DEBUG_PLOT__
                var res = sfd.ShowDialog(this);
                if (res == System.Windows.Forms.DialogResult.OK)
                {
#else
            sfd.FileName = "sarsa.dat";
#endif
                    BinaryFormatter bf = new BinaryFormatter();
                    {
                        using (var fs = sfd.OpenFile())
                        {
                            g = new Grid((Grid.BlockStatus[,])bf.Deserialize(fs), this.grid);
                            if (ql == null)
                                ql = new ReinforcementLearning.QLearning(
                                    this.g,
                                    new List<GridHelper.Directions>(Enum.GetValues(typeof(GridHelper.Directions)).Cast<GridHelper.Directions>()),
                                    Properties.Settings.Default.Gamma,
                                    Properties.Settings.Default.Alpha);
                            if (tdl == null)
                                tdl = new ReinforcementLearning.TDLambda(
                                    ql,
                                    Properties.Settings.Default.Lambda);
                            if (adp == null)
                                adp = new ReinforcementLearning.ADP(
                                    ql);
                            ql.QTable = (Hashtable)bf.Deserialize(fs);
                            ql.VisitedStateActions = (Hashtable)bf.Deserialize(fs);
                            ql.StepCounter = (long)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                tdl.UTable = (Hashtable)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                this.TDLambdaUtilityProgress = (Hashtable)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                adp.UTable = (Hashtable)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                this.ADPUtilityProgress = (Hashtable)bf.Deserialize(fs);
                        }
                    }
                    __reload_grid();
                    __plot_policy(ql);
                    __plot_utility(tdl, adp);
                    this.toolStripStatus.Text = "The QTable saved successfully....";
#if !__DEBUG_PLOT__
                }
#endif
            }
            __enable_all_menus(true);
        }