예제 #1
0
        public void modelUpdate(action_t action)
        {
            if (!isActionOk(action))
            {
                return;                      // should be assert
            }
            if (!m_last_update_percept == true)
            {
                return;                                // should be assert
            }
            // Update internal model
            symbol_list_t action_syms = new symbol_list_t(0); //(UInt64) m_actions_bits);

            encodeAction(action_syms, action);

            m_ct.update(action_syms);
            m_ct.updateHistory(action_syms);
            if (m_use_self_model)
            {
                m_self_model.update(action_syms);
            }
            m_hash = hashAfterSymbols(action_syms);

            m_time_cycle++;
            m_last_update_percept = false;
        }
 public BuildModeGenerationGUI()
 {
     InitializeComponent();
       Action = action_t.NONE;
       projectConfigs = new Hashtable();
       ResoreSettings();
 }
 public BuildModeGenerationGUI()
 {
     InitializeComponent();
     Action         = action_t.NONE;
     projectConfigs = new Hashtable();
     ResoreSettings();
 }
예제 #4
0
        /* selects the best action determined by the MCTS statistics */
        public action_t selectBestMCTSAction(Agent agent)
        {
            ModelUndo mu          = new ModelUndo(agent);
            action_t  best_action = agent.selectRandomAction(agent.rng);
            double    best_exp    = double.NegativeInfinity;
            bool      found       = false;

            for (UInt64 a = 0; a < agent.numActions(); a++)
            {
                SearchNode n = agent.spc.findNode(agent.hashAfterAction(a));
                if (n != null)
                {
                    double noise = agent.rng.NextDouble() * 0.0001;
                    double exp   = n.expectation() + noise;
                    if (exp > best_exp)
                    {
                        best_exp    = n.expectation();
                        best_action = a;
                        found       = true;
                    }
                    // agent.logbox.Items.Add("action " + a + ":" + exp + " visits " + n.visits() + " self-predicted probability :" + agent.getPredictedActionProb(a));
                }
            }

            //agent.logbox.Items.Add(" selectBestMCTSAction=" + best_action +"found ="+found );


            return(best_action);
        }
예제 #5
0
        // probability of selecting an action according to the
        // agent's internal model of it's own behaviour
        public double getPredictedActionProb(action_t action)
        {
            //return 0; // TODONE: implement

            // actions are equally likely if no internal model is used
            if (!m_use_self_model)
            {
                return(1.0 / (double)(m_actions));
            }

            // compute normalisation term, since some
            // actions may be illegal
            double        tot     = 0.0;
            symbol_list_t symlist = new symbol_list_t(0); //(UInt64)m_actions_bits);

            for (UInt64 a = 0; a < m_actions; a++)
            {
                encodeAction(symlist, a);
                tot += m_self_model.predict(symlist);
            }

            //assert(tot != 0.0);
            encodeAction(symlist, action);
            return(m_self_model.predict(symlist) / tot);
        }
예제 #6
0
        /* hash of history if we were to make a particular action */
        public hash_t hashAfterAction(action_t action)
        {
            //assert(isActionOk(action));

            symbol_list_t action_syms = new symbol_list_t(0); //(UInt64)m_actions_bits);

            encodeAction(action_syms, action);

            return(hashAfterSymbols(action_syms));
        }
예제 #7
0
        public action_t search(Agent agent)
        {
            action_t best       = 0;
            string   controller = (string)agent.options["controller"];

            if (controller == "mc")
            {
                best = naiveMonteCarlo(agent);
            }
            else if (controller == "mcts")
            {
                best = mcts(agent);
            }
            else
            {
                best = agent.selectRandomAction(agent.rng);
            }
            return(best);   // TODO
        }
예제 #8
0
        // determine the next action to play
        //determine the next child to explore, NULL if no such child exists
        public action_t selectAction(Agent agent, Random rng)
        {
            // TODONE: implement
            // higher values encourage more exploration, less exploitation
            double ExploreBias    = agent.horizon() * agent.maxReward();
            double UnexploredBias = 1000000000.0;

            //assert(!m_chance_node);

            action_t best_action   = 0;
            double   best_priority = double.NegativeInfinity;

            //Random rng = new Random ();
            for (UInt64 a = 0; a < agent.numActions(); a++)
            {
                SearchNode n = agent.spc.findNode(agent.hashAfterAction(a));
                //assert(n == NULL || n->m_chance_node);

                double priority, noise = rng.NextDouble() * 0.0001;

                // use UCB formula to compute priority
                if (n == null || n.visits() == 0)
                {
                    priority = UnexploredBias + noise;
                }
                else
                {
                    double pvisits = (double)(visits());
                    double cvisits = (double)(n.visits());
                    double bias    = ExploreBias * Math.Sqrt(2.0 * Math.Log(pvisits) / cvisits);
                    priority = n.expectation() + bias + noise;
                }

                if (priority > best_priority)
                {
                    best_action   = a;
                    best_priority = priority;
                }
            }

            return(best_action);
        }
예제 #9
0
        /* convert a list of symbols to an action, false on failure */
        bool symsToAction(symbol_list_t symlist, action_t action)
        {
            action = 0;

            //symbol_list_t::const_reverse_iterator it = symlist.rbegin();
            //for (UInt64 c = 0; it != symlist.rend(); ++it, c++) {
            //    if (*it == On) action |= (1 << c);
            //}
            UInt16 c = 0;

            foreach (bool bit in  symlist.reverseIterator())
            {
                if (bit == true)
                {
                    action = action | ((UInt64)1 << c);
                }
                c++;
            }

            return(isActionOk(action));
        }
 private void cancel_Click(object sender, System.EventArgs e)
 {
     Action = action_t.NONE;
     SaveSettings();
     this.Close();
 }
예제 #11
0
        public action_t naiveMonteCarlo(Agent agent)
        {
            DateTime ti      = DateTime.Now;
            TimeSpan elapsed = ti - ti;

            // determine the depth and number of seconds to search
            double time_limit_ms = double.Parse((string)agent.options["cycle-length-ms"]);
            double time_limit    = time_limit_ms / 1000.0;

            // sufficient statistics to compute the sample mean for each action
            // std::vector<std::pair<reward_t, double> > r(agent.numActions());
            double [] rfirst  = new double [agent.numActions()];
            double [] rsecond = new double[agent.numActions()];

            for (int i = 0; i < (int)agent.numActions(); i++)
            {
                rfirst[i] = rsecond[i] = 0.0;
            }

            ModelUndo mu             = new ModelUndo(agent);
            UInt64    total_samples  = 0;
            UInt64    start_hist_len = agent.historySize();

            do            // we ensure each action always has one estimate
            {
                for (UInt64 i = 0; i < agent.numActions(); i++)
                {
                    // make action
                    agent.modelUpdate(i);

                    // grab percept and determine immediate reward
                    symbol_list_t percept = new symbol_list_t(0);            //agent.preceptBits ());
                    agent.genPerceptAndUpdate(agent.rng, percept);
                    double reward = agent.rewardFromPercept(percept);

                    // playout the remainder of the sequence
                    reward += playout(agent, agent.rng, agent.horizon() - 1);

                    rfirst[i]  += reward;
                    rsecond[i] += 1.0;

                    agent.modelRevert(mu);
                    //assert(start_hist_len == agent.historySize());

                    total_samples++;
                }
                elapsed = DateTime.Now - ti;
            } while (Math.Abs(elapsed.TotalMilliseconds) < time_limit_ms);

            // determine best arm, breaking ties arbitrarily
            double   best        = double.NegativeInfinity;
            action_t best_action = 0;

            for (int i = 0; i < (int)agent.numActions(); i++)
            {
                // assert(r[i].second > 0.0);
                double noise = agent.rng.NextDouble() * 0.0001;

                double x = rfirst[i] / rsecond[i] + noise;

                if (x > best)
                {
                    best        = x;
                    best_action = (UInt64)i;
                }
            }

            //agent.logbox.Items.Add( "naive monte-carlo decision based on " + total_samples + " samples.");

            for (int i = 0; i < (int)agent.numActions(); i++)
            {
                //agent.logbox.Items.Add("action " + i + ": " +( rfirst[i] / rsecond[i]));
            }
            //agent.logbox.Items.Add(" best_action:" + best_action);
            return(best_action);
        }
예제 #12
0
 // action sanity check
 public bool isActionOk(action_t action)
 {
     return(action < m_actions);
 }
예제 #13
0
        // encoding/decoding actions and percepts to/from symbol lists
        void encodeAction(symbol_list_t symlist, action_t action)
        {
            symlist.clear();

            encode(symlist, (int)action, m_actions_bits);
        }
 private void enableCoverageClicked(object sender, System.EventArgs e)
 {
     Action = action_t.ENABLE_COVERAGE_CONFIG;
       SaveSettings();
       this.Close();
 }
 private void disableCoverageClicked(object sender, EventArgs e)
 {
     Action = action_t.DISABLE_COVERAGE_CONFIG;
       this.Close();
 }
 private void cancel_Click(object sender, System.EventArgs e)
 {
     Action = action_t.NONE;
       SaveSettings();
       this.Close();
 }
 private void enableCoverageClicked(object sender, System.EventArgs e)
 {
     Action = action_t.ENABLE_COVERAGE_CONFIG;
     SaveSettings();
     this.Close();
 }
예제 #18
0
        public UInt64 m_reward;            // the current reward

        // receives the agent's action and calculates the new environment percept
        public virtual void performAction(action_t action)
        {
            m_last_action = action;
            // return; // TODO: implement in inherited class
        }
 private void disableCoverageClicked(object sender, EventArgs e)
 {
     Action = action_t.DISABLE_COVERAGE_CONFIG;
     this.Close();
 }