Пример #1
0
 public RlEpisod(RlSearch <StateId_t, Action_t> search, RlModel <StateId_t, Action_t> model, RlTree <StateId_t, Action_t>[] searchTrees, RlSearchParams searchParams)
 {
     m_search       = search;
     m_model        = model;
     m_searchModel  = m_model.rlModelClone();
     m_searchTrees  = searchTrees;
     m_records      = new RlRecord[m_model.rlModelGetActorsCount(), searchParams.maxGameLength + 10];
     m_recordsCount = new int[m_model.rlModelGetActorsCount()];
     m_searchParams = searchParams;
     //m_searchParams.biasTermConstant = Math.Sqrt(Math.Log(maxGames));
 }
Пример #2
0
        // Thread based search
        public void Search(RlSearchParams searchParams)
        {
            m_searchParams = searchParams;
            m_searchThread = new Thread(Start);

            int actorsCount = m_model.rlModelGetActorsCount();

            m_searchTrees = new RlTree <StateId_t, Action_t> [actorsCount];
            for (int index = 0; index < m_searchTrees.Length; ++index)
            {
                m_searchTrees[index] = new RlTree <StateId_t, Action_t>();
            }

            // Start the search thread
            m_running = true;
            m_searchThread.Start();
        }
Пример #3
0
        public void Generate()
        {
            int actorId;

            // Copy the original model into the search model
            m_searchModel.rlModelCopy(m_model);
            //m_searchModel = m_model.rlModelClone();
            m_searchModel.rlModelInitES();

            // Track whether the actor is in playout mode
            bool[] playouts = new bool[m_model.rlModelGetActorsCount()];

            // Track the number of records for each actor
            Array.Clear(m_recordsCount, 0, m_recordsCount.Length);

            // Track the current node of each actor into the tree
            RlNode <StateId_t, Action_t>[] currentNode = new RlNode <StateId_t, Action_t> [m_model.rlModelGetActorsCount()];
            for (actorId = 0; actorId < currentNode.Length; ++actorId)
            {
                currentNode[actorId] = m_searchTrees[actorId].m_root;
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], -1, 0.0);
            }

            // Play a game
            int gameLength = 0;

            while (!m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength))
            {
                int      actionIndex = -1;
                Action_t action;
                actorId = m_searchModel.rlModelGetActorToAct();

                // Find an action to be applied to the actor
                if (playouts[actorId])
                {
                    m_searchModel.rlModelRandomAction(actorId, out action);
                }
                else
                {
                    m_searchTrees[actorId].AddState(m_searchModel, actorId, ref currentNode[actorId]);
                    actionIndex = SelectAction(currentNode[actorId]);
                    if (actionIndex >= 0)
                    {
                        action            = currentNode[actorId].m_actions[actionIndex].m_action;
                        playouts[actorId] = currentNode[actorId].m_actions[actionIndex].m_stats.m_count < m_searchParams.expandThreshold;
                    }
                    else
                    {
                        m_searchModel.rlModelActionGetNone(out action);
                        playouts[actorId] = true;
                    }
                }

                // If there is no valid action for the actor, then the search is over
                if (m_searchModel.rlModelActionIsNone(ref action))
                {
                    break;
                }
                // Apply the action
                double reward = m_searchModel.rlModelApplyAction(actorId, action);
                // Save the action into the episode history
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], actionIndex, reward);
                gameLength++;
            }

            double[] rewards = new double[m_searchModel.rlModelGetActorsCount()];;
            if (m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength))
            {
                m_searchModel.rlModelEstimate(m_model, ref rewards, m_searchParams.searchModelTag);
                m_search.m_avgEstimateGameLength += (gameLength - m_search.m_avgEstimateGameLength) / ++m_search.m_estimateGames;
            }
            else
            {
                m_searchModel.rlModelEvaluate(actorId, ref rewards);
                m_search.m_avgEvaluateGameLength += (gameLength - m_search.m_avgEvaluateGameLength) / ++m_search.m_evaluateGames;
            }

            // Add a final record to the episode to reflect the final rewards
            for (actorId = 0; actorId < rewards.Length; ++actorId)
            {
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(null, -1, rewards[actorId]);
            }
        }