public GOB.Action Run() { //Default actions that should always be taken Action a = CheckAlwaysBestAction(InitialNodes[0]); if (a != null) { return(a); } MCTSNode selectedNode; Reward reward; var startTime = Time.realtimeSinceStartup; this.CurrentIterationsInFrame = 0; int currentMCTS = 0; while (CurrentIterations < MaxIterations) { if (CurrentIterationsInFrame >= MaxIterationsProcessedPerFrame) { TotalProcessingTime += Time.realtimeSinceStartup - startTime; return(null); } selectedNode = Selection(InitialNodes[currentMCTS]); if (selectedNode == InitialNodes[currentMCTS]) { break; } reward = Playout(selectedNode.State); Backpropagate(selectedNode, reward); CurrentIterationsInFrame++; CurrentIterations++; currentMCTS++; if (currentMCTS == NumberOfRuns) { currentMCTS = 0; } } BestFirstChild = BestChildFromSeveral(InitialNodes); MCTSNode child = BestFirstChild; BestActionSequence.Clear(); while (child != null) { BestActionSequence.Add(child.Action); child = BestChild(child); } InProgress = false; if (BestFirstChild != null) { TotalProcessingTime += Time.realtimeSinceStartup - startTime; return(BestFirstChild.Action); } return(null); }
public GOB.Action Run() { //TODO: Execute MCTS PlayoutIterations times var startTime = Time.realtimeSinceStartup; this.CurrentIterationsInFrame = 0; int MaxIterations = this.MaxIterationsProcessedPerFrame / this.PlayoutIterations; MCTSNode selectedNode = new MCTSNode(this.CurrentStateWorldModel.GenerateChildWorldModel()); List <float> results = new List <float>(); int i = 0; for (i = 0; i < this.PlayoutIterations; i++) { Reward reward; while (this.CurrentIterationsInFrame < MaxIterations) { MCTSNode newNode = Selection(selectedNode); reward = Playout(newNode.State); Backpropagate(newNode, reward); this.CurrentIterationsInFrame++; } for (int j = 0; j < selectedNode.ChildNodes.Count; j++) { if (results.Count <= j) { results.Add(selectedNode.ChildNodes[j].Q / selectedNode.ChildNodes[j].N); } else { results[j] += selectedNode.ChildNodes[j].Q / selectedNode.ChildNodes[j].N; } if (i == this.PlayoutIterations - 1) { results[j] /= this.PlayoutIterations; } } } this.TotalProcessingTime += Time.realtimeSinceStartup - startTime; this.InProgress = false; //MCTSNode best = BestChild(selectedNode); MCTSNode best = BestAverageChild(selectedNode, results); BestActionSequence.Clear(); GOB.Action bestAction = best != null ? best.Action : null; if (bestAction != null) { BestActionSequence.Add(bestAction); } return(bestAction); }
//this method is very similar to the bestUCTChild, but it is used to return the final action of the MCTS search, and so we do not care about //the exploration factor private MCTSNode BestChild(MCTSNode node) { if (node.ChildNodes.Count == 0) { return(null); } float A = C * (float)Math.Sqrt(node.N); MCTSNode bestChild = null; float bestReward = 0; for (int i = 0; i < node.ChildNodes.Count; i++) { float newReward = 0; switch (strategy) { case BestStrategy.Max: newReward = node.ChildNodes[i].Q / node.ChildNodes[i].N; break; case BestStrategy.Robust: newReward = node.ChildNodes[i].N; break; case BestStrategy.MaxRobust: newReward = node.ChildNodes[i].Q / node.ChildNodes[i].N + node.ChildNodes[i].N; break; case BestStrategy.Secure: newReward = node.ChildNodes[i].Q / node.ChildNodes[i].N - A / Mathf.Sqrt(node.ChildNodes[i].N); break; } if (newReward > bestReward || bestChild == null) { bestChild = node.ChildNodes[i]; bestReward = newReward; } } if (bestChild == null || bestChild.Action == null) { BestActionSequence.Clear(); } return(bestChild); }
private MCTSNode BestAverageChild(MCTSNode node, List <float> results) { MCTSNode bestChild = null; float bestReward = 0; for (int i = 0; i < results.Count; i++) { float newReward = results[i]; if (newReward > bestReward || bestChild == null) { bestChild = node.ChildNodes[i]; bestReward = newReward; } } if (bestChild == null || bestChild.Action == null) { BestActionSequence.Clear(); } return(bestChild); }
public GOB.Action Run() { Reward reward; this.CurrentIterationsInFrame = 0; MCTSNode initialNode = this.InitialNode, selectedNode; while (CurrentIterations < MaxIterations && CurrentIterationsInFrame < MaxIterationsProcessedPerFrame) { selectedNode = Selection(initialNode); reward = Playout(selectedNode.State); Backpropagate(selectedNode, reward); CurrentIterations++; CurrentIterationsInFrame++; } if (CurrentIterations >= MaxIterations) { InProgress = false; } TotalProcessingTime += Time.deltaTime; BestActionSequence.Clear(); BestFirstChild = BestChild(initialNode); if (BestFirstChild == null) { return(null); } MCTSNode child = BestFirstChild; while (child != null) { BestActionSequence.Add(child.Action); child = BestChild(child); } return(BestFirstChild.Action); }