Exemple #1
0
        public void FindBestAction(out Action_t result, out int bestActionsCount, out double bestValue)
        {
            double value;
            var    actorId = m_model.rlModelGetActorToAct();

            bestValue        = Double.NaN;
            bestActionsCount = 0;
            Action_t[] bestActions = new Action_t[1000];

            if (m_searchThread != null)
            {
                m_searchThread.Join();
            }
            m_model.rlModelActionGetNone(out result);

/*
 *          var document = m_searchTrees[actorId].asXML(m_model);
 *          MemoryStream stream = new MemoryStream();
 *          XmlTextWriter writer = new XmlTextWriter(stream, Encoding.Unicode);
 *          writer.Formatting = Formatting.Indented;
 *          document.WriteContentTo(writer);
 *          writer.Flush();
 *          stream.Flush();
 *          stream.Position = 0;
 *          StreamReader reader = new StreamReader(stream);
 *          String formattedXML = reader.ReadToEnd();
 *          File.AppendAllText("log.txt", "\n\n" + formattedXML);
 */
            // For each child into the root node of the search tree
            foreach (var child in m_searchTrees[actorId].m_root.m_childs)
            {   // For each action into the child node
                foreach (var action in child.Value.m_actions)
                {
                    if (!action.m_stats.IsDefined)
                    {
                        continue;
                    }
                    value = action.m_stats.m_mean;
                    if (Double.IsNaN(bestValue) || bestValue < value)
                    {
                        bestValue        = value;
                        bestActionsCount = 0;
                        bestActions[bestActionsCount++] = action.m_action;
                    }
                    else if (Math.Abs(bestValue - value) < 0.0001f)
                    {
                        bestActions[bestActionsCount++] = action.m_action;
                    }
                }
            }

            if (bestActionsCount > 0)
            {
                result = bestActions[RlUtils.random.Next(bestActionsCount)];
            }
        }
Exemple #2
0
        public void Generate()
        {
            int actorId;

            // Copy the original model into the search model
            m_searchModel.rlModelCopy(m_model);
            //m_searchModel = m_model.rlModelClone();
            m_searchModel.rlModelInitES();

            // Track whether the actor is in playout mode
            bool[] playouts = new bool[m_model.rlModelGetActorsCount()];

            // Track the number of records for each actor
            Array.Clear(m_recordsCount, 0, m_recordsCount.Length);

            // Track the current node of each actor into the tree
            RlNode <StateId_t, Action_t>[] currentNode = new RlNode <StateId_t, Action_t> [m_model.rlModelGetActorsCount()];
            for (actorId = 0; actorId < currentNode.Length; ++actorId)
            {
                currentNode[actorId] = m_searchTrees[actorId].m_root;
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], -1, 0.0);
            }

            // Play a game
            int gameLength = 0;

            while (!m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength))
            {
                int      actionIndex = -1;
                Action_t action;
                actorId = m_searchModel.rlModelGetActorToAct();

                // Find an action to be applied to the actor
                if (playouts[actorId])
                {
                    m_searchModel.rlModelRandomAction(actorId, out action);
                }
                else
                {
                    m_searchTrees[actorId].AddState(m_searchModel, actorId, ref currentNode[actorId]);
                    actionIndex = SelectAction(currentNode[actorId]);
                    if (actionIndex >= 0)
                    {
                        action            = currentNode[actorId].m_actions[actionIndex].m_action;
                        playouts[actorId] = currentNode[actorId].m_actions[actionIndex].m_stats.m_count < m_searchParams.expandThreshold;
                    }
                    else
                    {
                        m_searchModel.rlModelActionGetNone(out action);
                        playouts[actorId] = true;
                    }
                }

                // If there is no valid action for the actor, then the search is over
                if (m_searchModel.rlModelActionIsNone(ref action))
                {
                    break;
                }
                // Apply the action
                double reward = m_searchModel.rlModelApplyAction(actorId, action);
                // Save the action into the episode history
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], actionIndex, reward);
                gameLength++;
            }

            double[] rewards = new double[m_searchModel.rlModelGetActorsCount()];;
            if (m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength))
            {
                m_searchModel.rlModelEstimate(m_model, ref rewards, m_searchParams.searchModelTag);
                m_search.m_avgEstimateGameLength += (gameLength - m_search.m_avgEstimateGameLength) / ++m_search.m_estimateGames;
            }
            else
            {
                m_searchModel.rlModelEvaluate(actorId, ref rewards);
                m_search.m_avgEvaluateGameLength += (gameLength - m_search.m_avgEvaluateGameLength) / ++m_search.m_evaluateGames;
            }

            // Add a final record to the episode to reflect the final rewards
            for (actorId = 0; actorId < rewards.Length; ++actorId)
            {
                m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(null, -1, rewards[actorId]);
            }
        }