public void FindBestAction(out Action_t result, out int bestActionsCount, out double bestValue) { double value; var actorId = m_model.rlModelGetActorToAct(); bestValue = Double.NaN; bestActionsCount = 0; Action_t[] bestActions = new Action_t[1000]; if (m_searchThread != null) { m_searchThread.Join(); } m_model.rlModelActionGetNone(out result); /* * var document = m_searchTrees[actorId].asXML(m_model); * MemoryStream stream = new MemoryStream(); * XmlTextWriter writer = new XmlTextWriter(stream, Encoding.Unicode); * writer.Formatting = Formatting.Indented; * document.WriteContentTo(writer); * writer.Flush(); * stream.Flush(); * stream.Position = 0; * StreamReader reader = new StreamReader(stream); * String formattedXML = reader.ReadToEnd(); * File.AppendAllText("log.txt", "\n\n" + formattedXML); */ // For each child into the root node of the search tree foreach (var child in m_searchTrees[actorId].m_root.m_childs) { // For each action into the child node foreach (var action in child.Value.m_actions) { if (!action.m_stats.IsDefined) { continue; } value = action.m_stats.m_mean; if (Double.IsNaN(bestValue) || bestValue < value) { bestValue = value; bestActionsCount = 0; bestActions[bestActionsCount++] = action.m_action; } else if (Math.Abs(bestValue - value) < 0.0001f) { bestActions[bestActionsCount++] = action.m_action; } } } if (bestActionsCount > 0) { result = bestActions[RlUtils.random.Next(bestActionsCount)]; } }
public void Generate() { int actorId; // Copy the original model into the search model m_searchModel.rlModelCopy(m_model); //m_searchModel = m_model.rlModelClone(); m_searchModel.rlModelInitES(); // Track whether the actor is in playout mode bool[] playouts = new bool[m_model.rlModelGetActorsCount()]; // Track the number of records for each actor Array.Clear(m_recordsCount, 0, m_recordsCount.Length); // Track the current node of each actor into the tree RlNode <StateId_t, Action_t>[] currentNode = new RlNode <StateId_t, Action_t> [m_model.rlModelGetActorsCount()]; for (actorId = 0; actorId < currentNode.Length; ++actorId) { currentNode[actorId] = m_searchTrees[actorId].m_root; m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], -1, 0.0); } // Play a game int gameLength = 0; while (!m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength)) { int actionIndex = -1; Action_t action; actorId = m_searchModel.rlModelGetActorToAct(); // Find an action to be applied to the actor if (playouts[actorId]) { m_searchModel.rlModelRandomAction(actorId, out action); } else { m_searchTrees[actorId].AddState(m_searchModel, actorId, ref currentNode[actorId]); actionIndex = SelectAction(currentNode[actorId]); if (actionIndex >= 0) { action = currentNode[actorId].m_actions[actionIndex].m_action; playouts[actorId] = currentNode[actorId].m_actions[actionIndex].m_stats.m_count < m_searchParams.expandThreshold; } else { m_searchModel.rlModelActionGetNone(out action); playouts[actorId] = true; } } // If there is no valid action for the actor, then the search is over if (m_searchModel.rlModelActionIsNone(ref action)) { break; } // Apply the action double reward = m_searchModel.rlModelApplyAction(actorId, action); // Save the action into the episode history m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], actionIndex, reward); gameLength++; } double[] rewards = new double[m_searchModel.rlModelGetActorsCount()];; if (m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength)) { m_searchModel.rlModelEstimate(m_model, ref rewards, m_searchParams.searchModelTag); m_search.m_avgEstimateGameLength += (gameLength - m_search.m_avgEstimateGameLength) / ++m_search.m_estimateGames; } else { m_searchModel.rlModelEvaluate(actorId, ref rewards); m_search.m_avgEvaluateGameLength += (gameLength - m_search.m_avgEvaluateGameLength) / ++m_search.m_evaluateGames; } // Add a final record to the episode to reflect the final rewards for (actorId = 0; actorId < rewards.Length; ++actorId) { m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(null, -1, rewards[actorId]); } }