Beispiel #1
0
        // resets the agent
        public void reset()
        {
            m_ct.clear();
            if (m_use_self_model)
            {
                m_self_model.clear();
            }

            m_time_cycle          = 0;
            m_total_reward        = 0.0;
            m_last_update_percept = false;
            m_hash = 5381 << 32;
        }
Beispiel #2
0
// The main agent/environment interaction loop
        void mainLoop(Agent ai, Environment env, Hashtable options)
        {
            //ai.logbox = logbox;
            // Determine exploration options
            bool   explore       = options.ContainsKey("exploration");
            double explore_rate  = 0;
            double explore_decay = 0;

            if (explore)
            {
                explore_rate  = Double.Parse((string)options["exploration"]);
                explore_decay = Double.Parse((string)options["explore-decay"]);

                //assert(0.0 <= explore_rate && explore_rate <= 1.0);
                //assert(0.0 <= explore_decay && explore_decay <= 1.0);
            }


            // Determine termination age
            bool  terminate_check = options.ContainsKey("terminate-age");
            age_t terminate_age   = 0;

            if (terminate_check)
            {
                terminate_age = UInt64.Parse((string)options["terminate-age"]);
                //assert(0 <= terminate_age);
            }

            // Agent/environment interaction loop
            for (int cycle = 1; !env.isFinished(); cycle++)
            {
                // check for agent termination
                if (terminate_check && ai.age() > terminate_age)
                {
                    //log << "info: terminating agent" << std::endl;
                    //logbox.Items.Add("info: terminating agent ");
                    break;
                }

                // Get a percept from the environment
                UInt64 observation = env.getObservation();
                UInt64 reward      = env.getReward();

                // Update agent's environment model with the new percept
                ai.modelUpdate(observation, reward); // TODO: implement in agent.cpp

                // Determine best exploitive action, or explore
                action_t action;
                bool     explored = false;
                Random   rand01   = new Random();

                if (explore && rand01.NextDouble() < explore_rate)
                {
                    explored = true;
                    action   = ai.genRandomAction();
                }
                else
                {
                    Search s = new Search();
                    action = s.search(ai);     // TODO: implement in search.cpp
                }

                // Send an action to the environment
                env.performAction(action); // TODO: implement for each environment

                // Update agent's environment model with the chosen action
                ai.modelUpdate(action); // TODO: implement in agent.cpp

                // Log this turn
                //logbox.Items.Add("----------------------");
                //logbox.Items.Add("cycle: " + cycle);
                // logbox.Items.Add("action: " + action );
                //logbox.Items.Add("m_observation: " + env.m_observation);
                //logbox.Items.Add("m_reward: " +env.m_reward );
                //logbox.Items.Add("explored: " + (explored ? "yes" : "no") );
                //logbox.Items.Add("explore rate: "  + explore_rate );
                //logbox.Items.Add("total reward: "  + ai.reward());
                //logbox.Items.Add("average reward: "  + ai.averageReward());
                //Application.DoEvents();

                // Update exploration rate
                if (explore)
                {
                    explore_rate *= explore_decay;
                }
            }
        }
Beispiel #3
0
        // revert the agent's internal model of the world
        // to that of a previous time cycle, false on failure
        public bool modelRevert(ModelUndo mu)
        {
            // return false; // TODONE: implement
            // assert(m_ct->historySize() > mu.historySize());
            // assert(!m_use_self_model || m_self_model->historySize() > mu.historySize());

            if (m_time_cycle < mu.age())
            {
                return(false);
            }

            // agent properties must be reverted before context update,
            // since the predicates that depend on the context may
            // depend on them
            m_time_cycle          = mu.age();
            m_hash                = mu.hash();
            m_total_reward        = mu.reward();
            m_last_update_percept = mu.lastUpdatePercept();

            // revert the context tree and history back to it's previous state

            if (mu.lastUpdatePercept())
            { // if we are undoing an action
                m_ct.revertHistory(mu.historySize());
                if (m_use_self_model)
                {
                    Int64 end_size = (Int64)m_self_model.historySize();
                    for (Int64 i = 0; i < (Int64)end_size - (Int64)mu.historySize(); i++)
                    {
                        m_self_model.revert();
                    }
                }
            }
            else
            {
                // if we are undoing an observation / reward
                Int64 end_size     = (Int64)m_ct.historySize();
                Int64 percept_bits = (Int64)(m_obs_bits + m_rew_bits);
                Int64 lim          = (Int64)end_size - (Int64)mu.historySize();
                for (Int64 i = 0; i < (Int64)end_size - (Int64)mu.historySize(); i++)
                {
                    //ORIGINAL ::  m_ct.revert(percept_bits - i - 1);
                    Int64 offset = percept_bits - i - 1;
                    m_ct.revert();
                    for (Int64 ix = 0; ix < (Int64)m_ct.size(); ix++)
                    {
                        if (ix != offset)
                        {
                            m_ct.m_history.pop_back();
                        }
                    }
                }
                if (m_use_self_model)
                {
                    m_self_model.revertHistory(mu.historySize());
                }
            }

            //assert(!m_use_self_model || m_self_model.historySize() == m_ct.historySize());

            return(true);
        }