protected int SelectAction(RlNode <StateId_t, Action_t> node) { if (node == null || node.m_actions.Length == 0) { return(-1); } if (node.m_stats.m_count == 0) { return(RlUtils.random.Next(node.m_actions.Length)); } double countLog = Math.Log(node.m_stats.m_count); double bestUpperBound = 0.0; int bestAction = -1; for (int index = 0; index < node.m_actions.Length; ++index) { var bound = getBoundLP(countLog, ref node.m_actions[index].m_stats); if (bestAction < 0 || bound > bestUpperBound) { bestAction = index; bestUpperBound = bound; } } return(bestAction); }
public bool AddChild(RlModel <StateId_t, Action_t> model, int actorId, out RlNode <StateId_t, Action_t> result) { StateId_t stateId = model.rlModelStateGetId(); if (m_childs.TryGetValue(stateId, out result)) { return(false); } result = new RlNode <StateId_t, Action_t>(model, actorId); m_childs.Add(stateId, result); return(true); }
public void Generate() { int actorId; // Copy the original model into the search model m_searchModel.rlModelCopy(m_model); //m_searchModel = m_model.rlModelClone(); m_searchModel.rlModelInitES(); // Track whether the actor is in playout mode bool[] playouts = new bool[m_model.rlModelGetActorsCount()]; // Track the number of records for each actor Array.Clear(m_recordsCount, 0, m_recordsCount.Length); // Track the current node of each actor into the tree RlNode <StateId_t, Action_t>[] currentNode = new RlNode <StateId_t, Action_t> [m_model.rlModelGetActorsCount()]; for (actorId = 0; actorId < currentNode.Length; ++actorId) { currentNode[actorId] = m_searchTrees[actorId].m_root; m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], -1, 0.0); } // Play a game int gameLength = 0; while (!m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength)) { int actionIndex = -1; Action_t action; actorId = m_searchModel.rlModelGetActorToAct(); // Find an action to be applied to the actor if (playouts[actorId]) { m_searchModel.rlModelRandomAction(actorId, out action); } else { m_searchTrees[actorId].AddState(m_searchModel, actorId, ref currentNode[actorId]); actionIndex = SelectAction(currentNode[actorId]); if (actionIndex >= 0) { action = currentNode[actorId].m_actions[actionIndex].m_action; playouts[actorId] = currentNode[actorId].m_actions[actionIndex].m_stats.m_count < m_searchParams.expandThreshold; } else { m_searchModel.rlModelActionGetNone(out action); playouts[actorId] = true; } } // If there is no valid action for the actor, then the search is over if (m_searchModel.rlModelActionIsNone(ref action)) { break; } // Apply the action double reward = m_searchModel.rlModelApplyAction(actorId, action); // Save the action into the episode history m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(currentNode[actorId], actionIndex, reward); gameLength++; } double[] rewards = new double[m_searchModel.rlModelGetActorsCount()];; if (m_searchModel.rlModelStateIsTerminal(m_model, m_searchParams.maxGameLength)) { m_searchModel.rlModelEstimate(m_model, ref rewards, m_searchParams.searchModelTag); m_search.m_avgEstimateGameLength += (gameLength - m_search.m_avgEstimateGameLength) / ++m_search.m_estimateGames; } else { m_searchModel.rlModelEvaluate(actorId, ref rewards); m_search.m_avgEvaluateGameLength += (gameLength - m_search.m_avgEvaluateGameLength) / ++m_search.m_evaluateGames; } // Add a final record to the episode to reflect the final rewards for (actorId = 0; actorId < rewards.Length; ++actorId) { m_records[actorId, m_recordsCount[actorId]++] = new RlRecord(null, -1, rewards[actorId]); } }
public RlRecord(RlNode <StateId_t, Action_t> node, int action, double reward) { m_node = node; m_action = action; m_reward = reward; }
public RlNode <StateId_t, Action_t> AddState(RlModel <StateId_t, Action_t> model, int actorId, ref RlNode <StateId_t, Action_t> currentNode) { if (currentNode == null) { currentNode = m_root; } if (currentNode.AddChild(model, actorId, out currentNode)) { m_nodesCount++; } return(currentNode); }
public RlTree() { m_root = new RlNode <StateId_t, Action_t>(this); m_nodesCount++; }