// resets the agent public void reset() { m_ct.clear(); if (m_use_self_model) { m_self_model.clear(); } m_time_cycle = 0; m_total_reward = 0.0; m_last_update_percept = false; m_hash = 5381 << 32; }
// The main agent/environment interaction loop void mainLoop(Agent ai, Environment env, Hashtable options) { //ai.logbox = logbox; // Determine exploration options bool explore = options.ContainsKey("exploration"); double explore_rate = 0; double explore_decay = 0; if (explore) { explore_rate = Double.Parse((string)options["exploration"]); explore_decay = Double.Parse((string)options["explore-decay"]); //assert(0.0 <= explore_rate && explore_rate <= 1.0); //assert(0.0 <= explore_decay && explore_decay <= 1.0); } // Determine termination age bool terminate_check = options.ContainsKey("terminate-age"); age_t terminate_age = 0; if (terminate_check) { terminate_age = UInt64.Parse((string)options["terminate-age"]); //assert(0 <= terminate_age); } // Agent/environment interaction loop for (int cycle = 1; !env.isFinished(); cycle++) { // check for agent termination if (terminate_check && ai.age() > terminate_age) { //log << "info: terminating agent" << std::endl; //logbox.Items.Add("info: terminating agent "); break; } // Get a percept from the environment UInt64 observation = env.getObservation(); UInt64 reward = env.getReward(); // Update agent's environment model with the new percept ai.modelUpdate(observation, reward); // TODO: implement in agent.cpp // Determine best exploitive action, or explore action_t action; bool explored = false; Random rand01 = new Random(); if (explore && rand01.NextDouble() < explore_rate) { explored = true; action = ai.genRandomAction(); } else { Search s = new Search(); action = s.search(ai); // TODO: implement in search.cpp } // Send an action to the environment env.performAction(action); // TODO: implement for each environment // Update agent's environment model with the chosen action ai.modelUpdate(action); // TODO: implement in agent.cpp // Log this turn //logbox.Items.Add("----------------------"); //logbox.Items.Add("cycle: " + cycle); // logbox.Items.Add("action: " + action ); //logbox.Items.Add("m_observation: " + env.m_observation); //logbox.Items.Add("m_reward: " +env.m_reward ); //logbox.Items.Add("explored: " + (explored ? "yes" : "no") ); //logbox.Items.Add("explore rate: " + explore_rate ); //logbox.Items.Add("total reward: " + ai.reward()); //logbox.Items.Add("average reward: " + ai.averageReward()); //Application.DoEvents(); // Update exploration rate if (explore) { explore_rate *= explore_decay; } } }
// revert the agent's internal model of the world // to that of a previous time cycle, false on failure public bool modelRevert(ModelUndo mu) { // return false; // TODONE: implement // assert(m_ct->historySize() > mu.historySize()); // assert(!m_use_self_model || m_self_model->historySize() > mu.historySize()); if (m_time_cycle < mu.age()) { return(false); } // agent properties must be reverted before context update, // since the predicates that depend on the context may // depend on them m_time_cycle = mu.age(); m_hash = mu.hash(); m_total_reward = mu.reward(); m_last_update_percept = mu.lastUpdatePercept(); // revert the context tree and history back to it's previous state if (mu.lastUpdatePercept()) { // if we are undoing an action m_ct.revertHistory(mu.historySize()); if (m_use_self_model) { Int64 end_size = (Int64)m_self_model.historySize(); for (Int64 i = 0; i < (Int64)end_size - (Int64)mu.historySize(); i++) { m_self_model.revert(); } } } else { // if we are undoing an observation / reward Int64 end_size = (Int64)m_ct.historySize(); Int64 percept_bits = (Int64)(m_obs_bits + m_rew_bits); Int64 lim = (Int64)end_size - (Int64)mu.historySize(); for (Int64 i = 0; i < (Int64)end_size - (Int64)mu.historySize(); i++) { //ORIGINAL :: m_ct.revert(percept_bits - i - 1); Int64 offset = percept_bits - i - 1; m_ct.revert(); for (Int64 ix = 0; ix < (Int64)m_ct.size(); ix++) { if (ix != offset) { m_ct.m_history.pop_back(); } } } if (m_use_self_model) { m_self_model.revertHistory(mu.historySize()); } } //assert(!m_use_self_model || m_self_model.historySize() == m_ct.historySize()); return(true); }