public RlEpisod(RlSearch <StateId_t, Action_t> search, RlModel <StateId_t, Action_t> model, RlTree <StateId_t, Action_t>[] searchTrees, RlSearchParams searchParams) { m_search = search; m_model = model; m_searchModel = m_model.rlModelClone(); m_searchTrees = searchTrees; m_records = new RlRecord[m_model.rlModelGetActorsCount(), searchParams.maxGameLength + 10]; m_recordsCount = new int[m_model.rlModelGetActorsCount()]; m_searchParams = searchParams; //m_searchParams.biasTermConstant = Math.Sqrt(Math.Log(maxGames)); }
// Thread based search public void Search(RlSearchParams searchParams) { m_searchParams = searchParams; m_searchThread = new Thread(Start); int actorsCount = m_model.rlModelGetActorsCount(); m_searchTrees = new RlTree <StateId_t, Action_t> [actorsCount]; for (int index = 0; index < m_searchTrees.Length; ++index) { m_searchTrees[index] = new RlTree <StateId_t, Action_t>(); } // Start the search thread m_running = true; m_searchThread.Start(); }
public void Generate() { int actorId; // Copy the original model into the search model m_searchModel.rlModelCopy(m_model); //m_searchModel = m_model.rlModelClone(); m_searchModel.rlModelInitES(); // Track whether the actor is in playout mode bool[] playouts = new bool[m_model.rlModelGetActorsCount()]; // Track the number of records for each actor Array.Clear(m_recordsCount, 0, m_recordsCount.Length); // Track the current node of each actor into the tree RlNode <StateId_t, Action_t>[] currentNode = new RlNode <StateId_t, Action_t> [m_model.rlModelGetActorsCount()]; for (actorId = 0; actorId < currentNode.Length; ++actorId) { currentNode[actorId] = m_searchTrees[actorId].m_root; m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], -1, 0.0); } // Play a game int gameLength = 0; while (!m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength)) { int actionIndex = -1; Action_t action; actorId = m_searchModel.rlModelGetActorToAct(); // Find an action to be applied to the actor if (playouts[actorId]) { m_searchModel.rlModelRandomAction(actorId, out action); } else { m_searchTrees[actorId].AddState(m_searchModel, actorId, ref currentNode[actorId]); actionIndex = SelectAction(currentNode[actorId]); if (actionIndex >= 0) { action = currentNode[actorId].m_actions[actionIndex].m_action; playouts[actorId] = currentNode[actorId].m_actions[actionIndex].m_stats.m_count < m_searchParams.expandThreshold; } else { m_searchModel.rlModelActionGetNone(out action); playouts[actorId] = true; } } // If there is no valid action for the actor, then the search is over if (m_searchModel.rlModelActionIsNone(ref action)) { break; } // Apply the action double reward = m_searchModel.rlModelApplyAction(actorId, action); // Save the action into the episode history m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], actionIndex, reward); gameLength++; } double[] rewards = new double[m_searchModel.rlModelGetActorsCount()];; if (m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength)) { m_searchModel.rlModelEstimate(m_model, ref rewards, m_searchParams.searchModelTag); m_search.m_avgEstimateGameLength += (gameLength - m_search.m_avgEstimateGameLength) / ++m_search.m_estimateGames; } else { m_searchModel.rlModelEvaluate(actorId, ref rewards); m_search.m_avgEvaluateGameLength += (gameLength - m_search.m_avgEvaluateGameLength) / ++m_search.m_evaluateGames; } // Add a final record to the episode to reflect the final rewards for (actorId = 0; actorId < rewards.Length; ++actorId) { m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(null, -1, rewards[actorId]); } }