예제 #1
0
        protected int SelectAction(RlNode <StateId_t, Action_t> node)
        {
            if (node == null || node.m_actions.Length == 0)
            {
                return(-1);
            }
            if (node.m_stats.m_count == 0)
            {
                return(RlUtils.random.Next(node.m_actions.Length));
            }
            double countLog       = Math.Log(node.m_stats.m_count);
            double bestUpperBound = 0.0;
            int    bestAction     = -1;

            for (int index = 0; index < node.m_actions.Length; ++index)
            {
                var bound = getBoundLP(countLog, ref node.m_actions[index].m_stats);
                if (bestAction < 0 || bound > bestUpperBound)
                {
                    bestAction     = index;
                    bestUpperBound = bound;
                }
            }
            return(bestAction);
        }
예제 #2
0
        public bool AddChild(RlModel <StateId_t, Action_t> model, int actorId, out RlNode <StateId_t, Action_t> result)
        {
            StateId_t stateId = model.rlModelStateGetId();

            if (m_childs.TryGetValue(stateId, out result))
            {
                return(false);
            }
            result = new RlNode <StateId_t, Action_t>(model, actorId);
            m_childs.Add(stateId, result);
            return(true);
        }
예제 #3
0
        public void Generate()
        {
            int actorId;

            // Copy the original model into the search model
            m_searchModel.rlModelCopy(m_model);
            //m_searchModel = m_model.rlModelClone();
            m_searchModel.rlModelInitES();

            // Track whether the actor is in playout mode
            bool[] playouts = new bool[m_model.rlModelGetActorsCount()];

            // Track the number of records for each actor
            Array.Clear(m_recordsCount, 0, m_recordsCount.Length);

            // Track the current node of each actor into the tree
            RlNode <StateId_t, Action_t>[] currentNode = new RlNode <StateId_t, Action_t> [m_model.rlModelGetActorsCount()];
            for (actorId = 0; actorId < currentNode.Length; ++actorId)
            {
                currentNode[actorId] = m_searchTrees[actorId].m_root;
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], -1, 0.0);
            }

            // Play a game
            int gameLength = 0;

            while (!m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength))
            {
                int      actionIndex = -1;
                Action_t action;
                actorId = m_searchModel.rlModelGetActorToAct();

                // Find an action to be applied to the actor
                if (playouts[actorId])
                {
                    m_searchModel.rlModelRandomAction(actorId, out action);
                }
                else
                {
                    m_searchTrees[actorId].AddState(m_searchModel, actorId, ref currentNode[actorId]);
                    actionIndex = SelectAction(currentNode[actorId]);
                    if (actionIndex >= 0)
                    {
                        action            = currentNode[actorId].m_actions[actionIndex].m_action;
                        playouts[actorId] = currentNode[actorId].m_actions[actionIndex].m_stats.m_count < m_searchParams.expandThreshold;
                    }
                    else
                    {
                        m_searchModel.rlModelActionGetNone(out action);
                        playouts[actorId] = true;
                    }
                }

                // If there is no valid action for the actor, then the search is over
                if (m_searchModel.rlModelActionIsNone(ref action))
                {
                    break;
                }
                // Apply the action
                double reward = m_searchModel.rlModelApplyAction(actorId, action);
                // Save the action into the episode history
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], actionIndex, reward);
                gameLength++;
            }

            double[] rewards = new double[m_searchModel.rlModelGetActorsCount()];;
            if (m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength))
            {
                m_searchModel.rlModelEstimate(m_model, ref rewards, m_searchParams.searchModelTag);
                m_search.m_avgEstimateGameLength += (gameLength - m_search.m_avgEstimateGameLength) / ++m_search.m_estimateGames;
            }
            else
            {
                m_searchModel.rlModelEvaluate(actorId, ref rewards);
                m_search.m_avgEvaluateGameLength += (gameLength - m_search.m_avgEvaluateGameLength) / ++m_search.m_evaluateGames;
            }

            // Add a final record to the episode to reflect the final rewards
            for (actorId = 0; actorId < rewards.Length; ++actorId)
            {
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(null, -1, rewards[actorId]);
            }
        }
예제 #4
0
 public RlRecord(RlNode <StateId_t, Action_t> node, int action, double reward)
 {
     m_node   = node;
     m_action = action;
     m_reward = reward;
 }
예제 #5
0
 public RlNode <StateId_t, Action_t> AddState(RlModel <StateId_t, Action_t> model, int actorId, ref RlNode <StateId_t, Action_t> currentNode)
 {
     if (currentNode == null)
     {
         currentNode = m_root;
     }
     if (currentNode.AddChild(model, actorId, out currentNode))
     {
         m_nodesCount++;
     }
     return(currentNode);
 }
예제 #6
0
 public RlTree()
 {
     m_root = new RlNode <StateId_t, Action_t>(this);
     m_nodesCount++;
 }