示例#1
0
 // updates the history statistics, without touching the context tree
 public void updateHistory(symbol_list_t symlist)
 {
     for (UInt64 i = 0; i < symlist.size(); i++)
     {
         m_history.push_back(symlist.bits[(int)i]);
     }
 }
示例#2
0
        /* gives the estimated probability of observing a particular sequence */
        public double predict(symbol_list_t symlist)
        {
            // if we haven't enough context to make an informed
            // prediction then guess uniformly randomly
            if (m_history.size() + symlist.size() <= m_depth)
            {
                double exp = -(double)(symlist.size());
                return(Math.Pow(2.0, exp));
            }

            // prob(sym1 ^ sym2 ^ ... | history) = prob(sym1 ^ sym2 ^ ... and history) / prob(history)
            double log_prob_history = logBlockProbability();

            update(symlist);
            double log_prob_syms_and_history = logBlockProbability();

            int it = 0;

            for (; it != symlist.bits.Count; ++it)
            {
                revert();
            }

            return(Math.Exp(log_prob_syms_and_history - log_prob_history));
        }
示例#3
0
        public void encodePercept(symbol_list_t symlist, UInt64 observation, UInt64 reward)
        {
            symlist.clear();

            encode(symlist, (int)observation, m_obs_bits);
            encode(symlist, (int)reward, m_rew_bits);
        }
示例#4
0
        // probability of selecting an action according to the
        // agent's internal model of it's own behaviour
        public double getPredictedActionProb(action_t action)
        {
            //return 0; // TODONE: implement

            // actions are equally likely if no internal model is used
            if (!m_use_self_model)
            {
                return(1.0 / (double)(m_actions));
            }

            // compute normalisation term, since some
            // actions may be illegal
            double        tot     = 0.0;
            symbol_list_t symlist = new symbol_list_t(0); //(UInt64)m_actions_bits);

            for (UInt64 a = 0; a < m_actions; a++)
            {
                encodeAction(symlist, a);
                tot += m_self_model.predict(symlist);
            }

            //assert(tot != 0.0);
            encodeAction(symlist, action);
            return(m_self_model.predict(symlist) / tot);
        }
示例#5
0
        // generate a percept distributed to our history statistics, and
        // update our mixture environment model with it

        /* generate a percept distributed to our history statistics, and
         * update our internal agent state. this is more efficient than calling
         * genPercept and modelUpdate separately. */
        public void genPerceptAndUpdate(Random rng, symbol_list_t precept)
        {
            m_ct.genRandomSymbolsAndUpdate(rng, precept, (UInt64)(m_obs_bits + m_rew_bits));
            nonCTModelUpdate(precept);

            //return 0; // TODONE: implement
        }
示例#6
0
        public void modelUpdate(action_t action)
        {
            if (!isActionOk(action))
            {
                return;                      // should be assert
            }
            if (!m_last_update_percept == true)
            {
                return;                                // should be assert
            }
            // Update internal model
            symbol_list_t action_syms = new symbol_list_t(0); //(UInt64) m_actions_bits);

            encodeAction(action_syms, action);

            m_ct.update(action_syms);
            m_ct.updateHistory(action_syms);
            if (m_use_self_model)
            {
                m_self_model.update(action_syms);
            }
            m_hash = hashAfterSymbols(action_syms);

            m_time_cycle++;
            m_last_update_percept = false;
        }
示例#7
0
 // Encodes a value onto the end of a symbol list using "bits" symbols
 public void encode(symbol_list_t symlist, int value, int bits)
 {
     for (int i = 0; i < bits; i++, value /= 2)
     {
         bool sym = ((value & 1) != 0);
         symlist.push_back(sym);
     }
 }
示例#8
0
        /* hash of history if we were to make a particular action */
        public hash_t hashAfterAction(action_t action)
        {
            //assert(isActionOk(action));

            symbol_list_t action_syms = new symbol_list_t(0); //(UInt64)m_actions_bits);

            encodeAction(action_syms, action);

            return(hashAfterSymbols(action_syms));
        }
示例#9
0
        public void update(symbol_list_t symlist)
        {
            // TODONE: implement
            int it = 0;

            for (; it != symlist.bits.Count; ++it)
            {
                update(symlist.bits[it]);
            }
        }
示例#10
0
 /* update the non-context tree part of an internal agent after receiving a percept */
 public void nonCTModelUpdate(symbol_list_t percept)
 {
     if (m_use_self_model)
     {
         m_self_model.updateHistory(percept);
     }
     m_hash                = hashAfterSymbols(percept);
     m_total_reward       += rewardFromPercept(percept);
     m_last_update_percept = true;
 }
示例#11
0
        // generate a specified number of random symbols
        // distributed according to the context tree statistics
        public void genRandomSymbols(Random rng, symbol_list_t symbols, UInt64 bits)
        {
            //Random rng = new Random();
            genRandomSymbolsAndUpdate(rng, symbols, bits);

            // restore the context tree to it's original state
            for (UInt64 i = 0; i < bits; i++)
            {
                revert();
            }
        }
示例#12
0
 // generate a specified number of random symbols distributed according to
 // the context tree statistics and update the context tree with the newly
 // generated bits
 public void genRandomSymbolsAndUpdate(Random rng, symbol_list_t symbols, UInt64 bits)
 {
     // TODONE: implement
     symbols.clear();
     //Random rng = new Random();
     for (UInt64 i = 0; i < bits; i++)
     {
         // flip a biased coin for each bit
         double prediction = predict(false);
         bool   rand_sym   = rng.NextDouble() < prediction ? false : true;
         symbols.push_back(rand_sym);
         update(rand_sym); // TODO: optimise this loop
     }
 }
示例#13
0
        // generate an action distributed according
        // to our history statistics
        public action_t genAction(Random rng)
        {
            // TODONE: implement
            symbol_list_t syms   = new symbol_list_t(0); //(UInt64)m_actions_bits);
            UInt64        action = 0;

            // use rejection sampling to pick an action according
            // to our historical distribution
            do
            {
                m_self_model.genRandomSymbols(rng, syms, (UInt64 )m_actions_bits);
            } while (!symsToAction(syms, action));

            return(action);
        }
示例#14
0
        /* computes the resultant history hash after processing a set of symbols */
        hash_t hashAfterSymbols(symbol_list_t new_syms)
        {
            hash_t rval = m_hash;

            // update the hash of the history
            //symbol_list_t::const_iterator it = new_syms.begin();
            int it = 0;

            for (; it != new_syms.bits.Length; ++it)
            {
                rval = hashAfterSymbol(new_syms.bits[it], rval);
            }

            return(rval);
        }
示例#15
0
        // update the internal agent's model of the world
        // due to receiving a percept or performing an action
        public void modelUpdate(UInt64 observation, UInt64 reward)
        {
            // Update internal model
            symbol_list_t percept = new symbol_list_t(0); //(UInt64)m_obs_bits + (UInt64)m_rew_bits);

            encodePercept(percept, observation, reward);

            m_ct.update(percept);
            m_ct.updateHistory(percept);


            // Update other properties
            //m_total_reward += reward;
            //m_last_update_percept = true;
            nonCTModelUpdate(percept);
        }
示例#16
0
        void getContext(symbol_list_t context)
        {
            // if (!m_context_functor.empty())
            // {
            //     m_context_functor(context);
            //     return;
            // }

            context.clear();

            // history_t::const_reverse_iterator ri = m_history.rbegin();
            int ri = m_history.mem.Count - 1;

            for (UInt64 c = 0; ri >= 0 && c < m_depth; --ri, c++)
            {
                context.push_back((bool)m_history.mem[(int)ri]);
            }
        }
示例#17
0
        /* convert a list of symbols to an action, false on failure */
        bool symsToAction(symbol_list_t symlist, action_t action)
        {
            action = 0;

            //symbol_list_t::const_reverse_iterator it = symlist.rbegin();
            //for (UInt64 c = 0; it != symlist.rend(); ++it, c++) {
            //    if (*it == On) action |= (1 << c);
            //}
            UInt16 c = 0;

            foreach (bool bit in  symlist.reverseIterator())
            {
                if (bit == true)
                {
                    action = action | ((UInt64)1 << c);
                }
                c++;
            }

            return(isActionOk(action));
        }
示例#18
0
        /* create (if necessary) all of the nodes in the current context */
        void createNodesInCurrentContext(symbol_list_t context)
        {
            CTNode ctn = m_root;

            for (UInt64 i = 0; i < context.size(); i++)
            {
                // scan context and make up new nodes as we go along
                // and insert in tree as necessary
                int    lp      = context.ibits((int)i);
                CTNode nxt_ctn = ctn.m_child[lp];
                if (nxt_ctn == null)
                {
                    //void *p = m_ctnode_pool.malloc();
                    //assert(p != NULL);  // TODO: make more robust
                    CTNode p = new CTNode();
                    ctn.m_child[lp] = p;
                    nxt_ctn         = p;
                }
                ctn = nxt_ctn;
            }
        }
示例#19
0
        /* simulate a path through a hypothetical future for the agent
         * within it's internal model of the world, returning the
         * accumulated reward. */
        double  playout(Agent agent, Random rng, int playout_len)
        {
            double start_reward = agent.reward();

            //ptr_vector<ModelUndo> undos;
            Stack undos = new Stack();

            for (int i = 0; i < playout_len; i++)
            {
                undos.Push(new ModelUndo(agent));

                // generate action
                UInt64 a = agent.useSelfModel() ?
                           agent.genAction(rng) : agent.selectRandomAction(rng);
                agent.modelUpdate(a);

                // generate percept
                symbol_list_t percept = new symbol_list_t(0); //agent.preceptBits ());
                undos.Push(new ModelUndo(agent));
                agent.genPerceptAndUpdate(rng, percept);
            }

            double rval = agent.reward() - start_reward;

            //boost::ptr_vector<ModelUndo>::reverse_iterator it = undos.rbegin();
            //int it = 0;
            //for (; it != undos.rend(); ++it)
            //{
            //agent.modelRevert(it);

            //}

            // POP ALL
            while (undos.Count > 0)
            {
                agent.modelRevert((ModelUndo)undos.Pop());
            }
            return(rval);
        }
示例#20
0
        /* interprets a list of symbols as a reward */
        public reward_t  rewardFromPercept(symbol_list_t percept)
        {
            // assert(percept.size() == m_obs_bits_c + m_rew_bits_c);

            // symbol_list_t::const_reverse_iterator it = percept.rbegin();
            //int it = 0;
            IEnumerator it = percept.reverseIterator().GetEnumerator();

            if (m_base2_reward_encoding)
            {     // base2 reward encoding
                int r = 0;
                for (int c = 0; c < m_rew_bits; it.MoveNext())
                {
                    //assert(it != percept.rend());
                    if ((bool)it.Current == true)
                    {
                        r |= (1 << c);
                    }
                    c++;
                }
                return((double)(r));
            }

            // assume the reward is the number of on bits
            double reward = 0.0;

            it.MoveNext();
            for (int c = 0; c < m_rew_bits; it.MoveNext())
            {
                //assert(it != percept.rend());
                if ((bool)it.Current == true)
                {
                    reward += 1.0;
                }
                c++;
            }

            return(reward);
        }
示例#21
0
        // Decodes the value encoded on the end of a list of symbols
        int decode(symbol_list_t symlist, int bits)
        {
            //assert(bits <= symlist.size());

            int value = 0;
            //IEnumerator it = symlist.reverseIterator().GetEnumerator();

            //symbol_list_t::const_reverse_iterator it = symlist.rbegin();
            //symbol_list_t::const_reverse_iterator end = it + bits;
            //for( ; it != end; ++it) {
            //	value = (*it ? 1 : 0) + 2 * value;
            //}
            int it  = 0;
            int end = it + bits;

            for (; it != end; ++it)
            {
                value = (symlist.bits[it] ? 1 : 0) + 2 * value;
            }

            return(value);
        }
示例#22
0
        // perform a sample run through this node and it's children,
        // returning the accumulated reward from this sample run
        public double sample(Agent agent, Random rng, int dfr)
        {
            // TODO: implement
            if (dfr == (int)agent.horizon() * 2)
            {
                return(0.0);
            }

            ModelUndo undo   = new ModelUndo(agent);
            double    reward = 0.0;

            if (m_chance_node)
            {                                                 // handle chance nodes
                // generate a hypothetical percept
                symbol_list_t percept = new symbol_list_t(0); //(UInt64)(agent.m_obs_bits + agent.m_rew_bits));
                agent.genPerceptAndUpdate(rng, percept);

                // extract the reward for this transition, and
                // update the agent model
                reward = agent.rewardFromPercept(percept);

                SearchNode n = agent.spc.findOrCreateNode(agent.hash(), false);
                reward += n.sample(agent, rng, dfr + 1);
                agent.modelRevert(undo);
            }
            else
            {  // handle decision nodes
                lock (m_mutex)
                {
                    // if we need to do a playout
                    bool do_playout =
                        visits() < MinVisitsBeforeExpansion ||
                        dfr >= MaxDistanceFromRoot ||
                        agent.spc.search_node_pool.Count >= (int)agent.spc.MaxSearchNodes;

                    if (do_playout)
                    {
                        //m_mutex.unlock();

                        reward = playout(agent, rng, (int)agent.horizon() - dfr / 2);
                    }
                    else
                    {
                        // pick an action
                        UInt64 a = selectAction(agent, rng);
                        //m_mutex.unlock();

                        // update model, and recurse
                        agent.modelUpdate(a);
                        SearchNode n = agent.spc.findOrCreateNode(agent.hash(), true);
                        reward = n.sample(agent, rng, dfr + 1);
                        agent.modelRevert(undo);
                    }
                }
            }

            { // update our statistics for this node
                lock (m_mutex)
                {
                    double vc = (double)(m_visits);
                    m_mean = (m_mean * vc + reward) / (vc + 1.0);
                    m_visits++;
                }
            }

            return(reward);
        }
示例#23
0
        public action_t naiveMonteCarlo(Agent agent)
        {
            DateTime ti      = DateTime.Now;
            TimeSpan elapsed = ti - ti;

            // determine the depth and number of seconds to search
            double time_limit_ms = double.Parse((string)agent.options["cycle-length-ms"]);
            double time_limit    = time_limit_ms / 1000.0;

            // sufficient statistics to compute the sample mean for each action
            // std::vector<std::pair<reward_t, double> > r(agent.numActions());
            double [] rfirst  = new double [agent.numActions()];
            double [] rsecond = new double[agent.numActions()];

            for (int i = 0; i < (int)agent.numActions(); i++)
            {
                rfirst[i] = rsecond[i] = 0.0;
            }

            ModelUndo mu             = new ModelUndo(agent);
            UInt64    total_samples  = 0;
            UInt64    start_hist_len = agent.historySize();

            do            // we ensure each action always has one estimate
            {
                for (UInt64 i = 0; i < agent.numActions(); i++)
                {
                    // make action
                    agent.modelUpdate(i);

                    // grab percept and determine immediate reward
                    symbol_list_t percept = new symbol_list_t(0);            //agent.preceptBits ());
                    agent.genPerceptAndUpdate(agent.rng, percept);
                    double reward = agent.rewardFromPercept(percept);

                    // playout the remainder of the sequence
                    reward += playout(agent, agent.rng, agent.horizon() - 1);

                    rfirst[i]  += reward;
                    rsecond[i] += 1.0;

                    agent.modelRevert(mu);
                    //assert(start_hist_len == agent.historySize());

                    total_samples++;
                }
                elapsed = DateTime.Now - ti;
            } while (Math.Abs(elapsed.TotalMilliseconds) < time_limit_ms);

            // determine best arm, breaking ties arbitrarily
            double   best        = double.NegativeInfinity;
            action_t best_action = 0;

            for (int i = 0; i < (int)agent.numActions(); i++)
            {
                // assert(r[i].second > 0.0);
                double noise = agent.rng.NextDouble() * 0.0001;

                double x = rfirst[i] / rsecond[i] + noise;

                if (x > best)
                {
                    best        = x;
                    best_action = (UInt64)i;
                }
            }

            //agent.logbox.Items.Add( "naive monte-carlo decision based on " + total_samples + " samples.");

            for (int i = 0; i < (int)agent.numActions(); i++)
            {
                //agent.logbox.Items.Add("action " + i + ": " +( rfirst[i] / rsecond[i]));
            }
            //agent.logbox.Items.Add(" best_action:" + best_action);
            return(best_action);
        }
示例#24
0
 // generate a percept distributed according
 // to our history statistics
 public void genPercept(Random rng, symbol_list_t symlist)
 {
     m_ct.genRandomSymbols(rng, symlist, (UInt64)m_obs_bits + (UInt64)m_rew_bits);
     //return 0; // TODONE: implement
 }
示例#25
0
 // get the agent's probability of receiving a particular percept
 //public double perceptProbability(UInt64 observation, UInt64 reward)
 public double perceptProbability(symbol_list_t percept)
 {
     //return 0; // TODONE: implement
     // assert(percept.size() == m_obs_bits_c + m_rew_bits_c);
     return(m_ct.predict(percept));
 }
示例#26
0
        // encoding/decoding actions and percepts to/from symbol lists
        void encodeAction(symbol_list_t symlist, action_t action)
        {
            symlist.clear();

            encode(symlist, (int)action, m_actions_bits);
        }
示例#27
0
/* removes the most recently observed symbol from the context tree */

        /*
         * void revert(UInt64  offset) {
         *
         * m_cts[offset].revert();
         * for (size_t i=0; i < m_cts.size(); i++) {
         * if (i != offset) m_cts[i].m_history.pop_back();
         * }
         * }*/
        // removes the most recently observed symbol from the context tree
        public void revert()
        {
            // TODONE: implement
            if (m_history.size() == 0)
            {
                return;
            }

            // 1. remove the most recent symbol from the context buffer
            symbol_t sym = m_history.back();

            m_history.pop_back();
            // compute the current context
            symbol_list_t context = new symbol_list_t(0);     //m_depth);

            // context.reserve((int)m_depth);
            getContext(context);

            // no need to undo a context tree update if there was
            // not enough context to begin with
            if (context.size() < m_depth)
            {
                return;
            }
            // 2. determine the path to the leaf nodes
            Stack path = new Stack();

            path.Push(m_root);
            // add the path to the leaf nodes
            CTNode ctn = m_root;

            for (UInt64 i = 0; i < context.size() && ctn != null; i++)
            {
                ctn = ctn.m_child[context.ibits(i)];
                path.Push(ctn);
            }

            // 3. update the probability estimates from the leaf node back up to the root,
            //    deleting any superfluous nodes as we go
            for (; path.Count != 0; path.Pop())
            {
                ctn = (CTNode)path.Peek();  //top();
                if (ctn == null)
                {
                    break;
                }
                // undo the previous KT estimate update
                ctn.m_count[sym? 1:0]--;
                double log_kt_mul = ctn.logKTMul(sym);
                ctn.m_log_prob_est -= log_kt_mul;

                // reclaim memory for any children nodes that now have seen no data
                for (UInt64 i = 0; i < 2; i++)
                {
                    // bool sym = symbols[i];
                    bool my_sym = (i == 1);
                    if (ctn.m_child[my_sym ? 1 : 0] != null && ctn.m_child[my_sym ? 1 : 0].visits() == 0)
                    {
                        //m_ctnode_pool.free(ctn.m_child[sym]);
                        ctn.m_child[my_sym ? 1 : 0] = null;
                    }
                }

                // update the weighted probabilities
                if (path.Count == (int)m_depth + 1)
                {
                    ctn.m_log_prob_weighted = ctn.logProbEstimated();
                }
                else
                {
                    // computes P_w = log{0.5 * [P_kt + P_w0*P_w1]}
                    double log_prob_on = ctn.child(true) != null?ctn.child(true).logProbWeighted() : 0.0;

                    double log_prob_off = ctn.child(false) != null?ctn.child(false).logProbWeighted() : 0.0;

                    double log_one_plus_exp = log_prob_off + log_prob_on - ctn.logProbEstimated();

                    // NOTE: no need to compute the log(1+e^x) if x is large, plus it avoids overflows
                    if (log_one_plus_exp < 100.0)
                    {
                        log_one_plus_exp = Math.Log(1.0 + Math.Exp(log_one_plus_exp));
                    }

                    ctn.m_log_prob_weighted = log_point_five + ctn.logProbEstimated() + log_one_plus_exp;
                }
            }
        }
示例#28
0
 UInt64 decodeAction(symbol_list_t symlist)
 {
     return((UInt64)decode(symlist, (int)m_actions_bits));
 }
示例#29
0
 public UInt64 decodeReward(symbol_list_t symlist)
 {
     return((UInt64)decode(symlist, (int)m_rew_bits));
 }
示例#30
0
        /* updates the context tree with a single symbol */
        void update(symbol_t sym)
        {
            // TODONE: implement

            // compute the current context
            symbol_list_t context = new symbol_list_t(0); //m_depth);

            //context.reserve((int) m_depth);
            getContext(context);
            // if we have not seen enough context, append the symbol
            // to the history buffer and skip updating the context tree
            if (context.size() < m_depth)
            {
                m_history.push_back(sym);
                return;
            }
            // 1. create new nodes in the context tree (if necessary)

            createNodesInCurrentContext(context);
            // 2. walk down the tree to the relevant leaf context, saving the path as we go
            Stack path = new Stack();

            path.Push(m_root);     // add the empty context

            // add the path to the leaf nodes
            CTNode ctn = m_root;

            for (UInt64 i = 0; i < context.size(); i++)
            {
                ctn = ctn.m_child[context.ibits((int)i)];
                path.Push(ctn);
            }
            // 3. update the probability estimates from the leaf node back up to the root

            for (; path.Count != 0; path.Pop())
            {
                CTNode n = (CTNode)path.Peek();     // .Top();

                // update the KT estimate and counts
                double log_kt_mul = n.logKTMul(sym);
                n.m_log_prob_est += log_kt_mul;
                n.m_count[(sym ? 1 : 0)]++;

                // update the weighted probabilities
                if (path.Count == (int)m_depth + 1)
                {
                    n.m_log_prob_weighted = n.logProbEstimated();
                }
                else
                {
                    // computes P_w = log{0.5 * [P_kt + P_w0*P_w1]}
                    double log_prob_on = n.child(true) != null?n.child(true).logProbWeighted() : 0.0;

                    double log_prob_off = n.child(false) != null?n.child(false).logProbWeighted() : 0.0;

                    double log_one_plus_exp = log_prob_off + log_prob_on - n.logProbEstimated();

                    // NOTE: no need to compute the log(1+e^x) if x is large, plus it avoids overflows
                    if (log_one_plus_exp < 100.0)
                    {
                        log_one_plus_exp = Math.Log(1.0 + Math.Exp(log_one_plus_exp));
                    }

                    n.m_log_prob_weighted = log_point_five + n.logProbEstimated() + log_one_plus_exp;
                }
            }



            // 4. save the new symbol to the context buffer
            m_history.push_back(sym);
        }