Esempio n. 1
0
        private void __build_UTable(List<KeyValuePair<Hashtable, Hashtable>> policyHistory)
        {
            System.Threading.Thread t = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart((thread) =>
            {
                var origin_txt = this.toolStripStatus.Text;
                this.toolStripStatus.Text = "Calculating utility values...";
                List<Hashtable> tdlist = new List<Hashtable>();
                tdl = new ReinforcementLearning.TDLambda(ql, Properties.Settings.Default.Lambda);
                TDLambdaUtilityProgress = new Hashtable();
                float c = 0;
                foreach (var epic in policyHistory)
                {
                    //tdl.QTable = epic.Key;
                    foreach (Point state in epic.Value.Keys)
                    {
                        tdl.InitialState = state;
                        tdl.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; }));
                        // store td-lambda utility progress for the state
                        if (TDLambdaUtilityProgress.Contains(state))
                            (TDLambdaUtilityProgress[state] as List<float>).Add((float)tdl.UTable[state]);
                        else
                            TDLambdaUtilityProgress.Add(state, new List<float>() { (float)tdl.UTable[state] });
                    }
                    this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100);
                    tdlist.Add(tdl.UTable);
                }
                adp = new ReinforcementLearning.ADP(ql);
                List<Hashtable> adplist = new List<Hashtable>();
                ADPUtilityProgress = new Hashtable();
                foreach (var epic in policyHistory)
                {
                    //tdl.QTable = epic.Key;
                    foreach (Point state in epic.Value.Keys)
                    {
                        adp.InitialState = state;
                        adp.Learn(new Func<Grid, Point, long, bool>((g, s, step_counter) => { return s == g.GoalPoint; }));
                        // store td-lambda utility progress for the state
                        if (ADPUtilityProgress.Contains(state))
                            (ADPUtilityProgress[state] as List<float>).Add((float)adp.UTable[state]);
                        else
                            ADPUtilityProgress.Add(state, new List<float>() { (float)adp.UTable[state] });
                    }
                    this.toolStripStatus.Text = String.Format("[ {0:F1}% ] Calculating utility values...", (++c / (2 * policyHistory.Count)) * 100);
                    adplist.Add(adp.UTable);
                }
                __plot_utility(tdl, adp);
                this.toolStripStatus.Text = origin_txt;
                ThreadsPool.Remove(thread as System.Threading.Thread);
            }));
            t.Start(t);
            ThreadsPool.Add(t);

        }
Esempio n. 2
0
        private void loadToolStripMenuItem_Click(object sender, EventArgs e)
        {
            __enable_all_menus(false);
            using (OpenFileDialog sfd = new OpenFileDialog())
            {
                sfd.DefaultExt = "dat";
                sfd.AddExtension = true;
                sfd.Filter = "Data files (*.dat)|*.dat";
#if !__DEBUG_PLOT__
                var res = sfd.ShowDialog(this);
                if (res == System.Windows.Forms.DialogResult.OK)
                {
#else
            sfd.FileName = "sarsa.dat";
#endif
                    BinaryFormatter bf = new BinaryFormatter();
                    {
                        using (var fs = sfd.OpenFile())
                        {
                            g = new Grid((Grid.BlockStatus[,])bf.Deserialize(fs), this.grid);
                            if (ql == null)
                                ql = new ReinforcementLearning.QLearning(
                                    this.g,
                                    new List<GridHelper.Directions>(Enum.GetValues(typeof(GridHelper.Directions)).Cast<GridHelper.Directions>()),
                                    Properties.Settings.Default.Gamma,
                                    Properties.Settings.Default.Alpha);
                            if (tdl == null)
                                tdl = new ReinforcementLearning.TDLambda(
                                    ql,
                                    Properties.Settings.Default.Lambda);
                            if (adp == null)
                                adp = new ReinforcementLearning.ADP(
                                    ql);
                            ql.QTable = (Hashtable)bf.Deserialize(fs);
                            ql.VisitedStateActions = (Hashtable)bf.Deserialize(fs);
                            ql.StepCounter = (long)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                tdl.UTable = (Hashtable)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                this.TDLambdaUtilityProgress = (Hashtable)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                adp.UTable = (Hashtable)bf.Deserialize(fs);
                            // support for non-UTable contain files
                            if (fs.Position < fs.Length)
                                this.ADPUtilityProgress = (Hashtable)bf.Deserialize(fs);
                        }
                    }
                    __reload_grid();
                    __plot_policy(ql);
                    __plot_utility(tdl, adp);
                    this.toolStripStatus.Text = "The QTable saved successfully....";
#if !__DEBUG_PLOT__
                }
#endif
            }
            __enable_all_menus(true);
        }