private void RegularMCTSSearch(Node <GameState> currNode) { Game game = new Game(currNode.Value); List <Tuple <int, int> > moves = game.GetMoves(); /* find the most promising leaf node */ currNode = findMostPromisingLeafNode(currNode); /* if the leaf node is a game ending state use correct score */ float score = 0.0f; game = new Game(currNode.Value); if (game.IsOver()) { score = game.GetScore(); } else { moves = game.GetMoves(); /* create children of normal leaf */ createChildren(currNode); /* choose random child.. */ int best_policy_child_index = RandomGen2.Next(0, currNode.Children.Count); /*get the value of best child..*/ currNode = currNode.Children[best_policy_child_index]; score = simulateRandomPlayout(currNode); } /* update the tree with the new score and visit counts */ backpropagateScore(currNode, score); }
/// <summary> /// Play randomly until game is over and update all winrates in the tree /// </summary> /// <param name="currNode"></param> /// <returns></returns> private float simulateRandomPlayout(Node <GameState> currNode) { Game game = new Game(currNode.Value); while (!game.IsOver()) { List <Tuple <int, int> > moves = game.GetMoves(); game.DoMove(moves[RandomGen2.Next(0, moves.Count)]); } return(game.GetScore()); }
public int findBestChildVisitCountStochastic(float temperature) { List <float> visits = applyTemperature(rootNode, temperature); float randomNr = RandomGen2.NextFloat(); float probabilitySum = 0.0f; float sumVisits = 0.0f; List <float> moveProbabilities = new List <float>(new float[rootNode.Children.Count]); foreach (var childNode in rootNode.Children) { sumVisits += childNode.visits; } for (int i = 0; i < rootNode.Children.Count; ++i) { probabilitySum += rootNode.Children[i].visits / sumVisits; if (probabilitySum >= randomNr) { return(i); } } return(rootNode.Children.Count - 1); }
public Tuple <int, int> GetMove(Game game) { List <Tuple <int, int> > moves = game.GetMoves(); return(moves[RandomGen2.Next(0, moves.Count)]); }
private Node <GameState> findMostPromisingLeafNode(Node <GameState> currNode) { bool isRootNode = true; // the current rootNode of the search tree while (currNode.HasChild) { if (isRootNode && !currNode.noiseAdded) { addDirichletNoise(currNode); } isRootNode = false; List <int> draws = new List <int>(); /* create the game from the GameState */ Game game = new Game(currNode.Value); List <Tuple <int, int> > moves = game.GetMoves(); // valid moves /* find best child node (best UCT value) to expand */ float bestUCTScore = float.NegativeInfinity; int bestChildIndex = -1; // if nnpolicy is null then also all children have no nn output, but possibly a score from endgame position for (int i = 0; i < currNode.Children.Count; ++i) { float temp_UCT_score = float.NegativeInfinity; // q_value float childWinrate; if (currNode.Children[i].visits != 0) { childWinrate = currNode.Children[i].q_value; } else { childWinrate = -currNode.q_value - Params.FPU_REDUCTION; } // exploration float explorationTerm = 0.0f; if (currNode.nn_policy != null) { // we have the policy output explorationTerm = Params.C_PUCT * currNode.nn_policy[currNode.Children[i].moveIndex] * (float)Math.Sqrt(currNode.visits + currNode.virtualVisits) / (float)(currNode.Children[i].visits + currNode.Children[i].virtualVisits + 1); } else { // assume policy equal for all children if not found yet (because of virtual visits) explorationTerm = Params.C_PUCT * (1.0f / currNode.Children.Count) * (float)Math.Sqrt(currNode.visits + currNode.virtualVisits) / (float)(currNode.Children[i].visits + +currNode.Children[i].virtualVisits + 1); } temp_UCT_score = childWinrate + explorationTerm; if (temp_UCT_score > bestUCTScore) { draws.Clear(); bestChildIndex = i; bestUCTScore = temp_UCT_score; } else if (temp_UCT_score == bestUCTScore) { draws.Add(i); } //Console.WriteLine("winrate " + childWinrate + " exploration " + explorationTerm + " total " + temp_UCT_score); } if (draws.Count != 0) { currNode = currNode.Children[draws[RandomGen2.Next(0, draws.Count)]]; } else { currNode = currNode.Children[bestChildIndex]; } } return(currNode); }