Пример #1
0
 // construct a save point
 public ModelUndo(Agent agent)
 {
     m_age                 = agent.age();
     m_hash                = agent.hash();
     m_reward              = agent.reward();
     m_history_size        = agent.historySize();
     m_last_update_percept = agent.m_last_update_percept;
 }
Пример #2
0
 /* update the non-context tree part of an internal agent after receiving a percept */
 public void nonCTModelUpdate(symbol_list_t percept)
 {
     if (m_use_self_model)
     {
         m_self_model.updateHistory(percept);
     }
     m_hash                = hashAfterSymbols(percept);
     m_total_reward       += rewardFromPercept(percept);
     m_last_update_percept = true;
 }
Пример #3
0
        // resets the agent
        public void reset()
        {
            m_ct.clear();
            if (m_use_self_model)
            {
                m_self_model.clear();
            }

            m_time_cycle          = 0;
            m_total_reward        = 0.0;
            m_last_update_percept = false;
            m_hash = 5381 << 32;
        }
Пример #4
0
        // revert the agent's internal model of the world
        // to that of a previous time cycle, false on failure
        public bool modelRevert(ModelUndo mu)
        {
            // return false; // TODONE: implement
            // assert(m_ct->historySize() > mu.historySize());
            // assert(!m_use_self_model || m_self_model->historySize() > mu.historySize());

            if (m_time_cycle < mu.age())
            {
                return(false);
            }

            // agent properties must be reverted before context update,
            // since the predicates that depend on the context may
            // depend on them
            m_time_cycle          = mu.age();
            m_hash                = mu.hash();
            m_total_reward        = mu.reward();
            m_last_update_percept = mu.lastUpdatePercept();

            // revert the context tree and history back to it's previous state

            if (mu.lastUpdatePercept())
            { // if we are undoing an action
                m_ct.revertHistory(mu.historySize());
                if (m_use_self_model)
                {
                    Int64 end_size = (Int64)m_self_model.historySize();
                    for (Int64 i = 0; i < (Int64)end_size - (Int64)mu.historySize(); i++)
                    {
                        m_self_model.revert();
                    }
                }
            }
            else
            {
                // if we are undoing an observation / reward
                Int64 end_size     = (Int64)m_ct.historySize();
                Int64 percept_bits = (Int64)(m_obs_bits + m_rew_bits);
                Int64 lim          = (Int64)end_size - (Int64)mu.historySize();
                for (Int64 i = 0; i < (Int64)end_size - (Int64)mu.historySize(); i++)
                {
                    //ORIGINAL ::  m_ct.revert(percept_bits - i - 1);
                    Int64 offset = percept_bits - i - 1;
                    m_ct.revert();
                    for (Int64 ix = 0; ix < (Int64)m_ct.size(); ix++)
                    {
                        if (ix != offset)
                        {
                            m_ct.m_history.pop_back();
                        }
                    }
                }
                if (m_use_self_model)
                {
                    m_self_model.revertHistory(mu.historySize());
                }
            }

            //assert(!m_use_self_model || m_self_model.historySize() == m_ct.historySize());

            return(true);
        }