Пример #1
0
        private void RegularMCTSSearch(Node <GameState> currNode)
        {
            Game game = new Game(currNode.Value);
            List <Tuple <int, int> > moves = game.GetMoves();

            /* find the most promising leaf node */
            currNode = findMostPromisingLeafNode(currNode);

            /* if the leaf node is a game ending state use correct score */
            float score = 0.0f;

            game = new Game(currNode.Value);
            if (game.IsOver())
            {
                score = game.GetScore();
            }
            else
            {
                moves = game.GetMoves();

                /* create children of normal leaf */
                createChildren(currNode);

                /* choose random child.. */
                int best_policy_child_index = RandomGen2.Next(0, currNode.Children.Count);

                /*get the value of best child..*/
                currNode = currNode.Children[best_policy_child_index];

                score = simulateRandomPlayout(currNode);
            }

            /* update the tree with the new score and visit counts */
            backpropagateScore(currNode, score);
        }
Пример #2
0
        /// <summary>
        /// Play randomly until game is over and update all winrates in the tree
        /// </summary>
        /// <param name="currNode"></param>
        /// <returns></returns>
        private float simulateRandomPlayout(Node <GameState> currNode)
        {
            Game game = new Game(currNode.Value);

            while (!game.IsOver())
            {
                List <Tuple <int, int> > moves = game.GetMoves();

                game.DoMove(moves[RandomGen2.Next(0, moves.Count)]);
            }

            return(game.GetScore());
        }
Пример #3
0
        public int findBestChildVisitCountStochastic(float temperature)
        {
            List <float> visits   = applyTemperature(rootNode, temperature);
            float        randomNr = RandomGen2.NextFloat();

            float        probabilitySum    = 0.0f;
            float        sumVisits         = 0.0f;
            List <float> moveProbabilities = new List <float>(new float[rootNode.Children.Count]);

            foreach (var childNode in rootNode.Children)
            {
                sumVisits += childNode.visits;
            }
            for (int i = 0; i < rootNode.Children.Count; ++i)
            {
                probabilitySum += rootNode.Children[i].visits / sumVisits;
                if (probabilitySum >= randomNr)
                {
                    return(i);
                }
            }
            return(rootNode.Children.Count - 1);
        }
Пример #4
0
        public Tuple <int, int> GetMove(Game game)
        {
            List <Tuple <int, int> > moves = game.GetMoves();

            return(moves[RandomGen2.Next(0, moves.Count)]);
        }
Пример #5
0
        private Node <GameState> findMostPromisingLeafNode(Node <GameState> currNode)
        {
            bool isRootNode = true; // the current rootNode of the search tree

            while (currNode.HasChild)
            {
                if (isRootNode && !currNode.noiseAdded)
                {
                    addDirichletNoise(currNode);
                }
                isRootNode = false;
                List <int> draws = new List <int>();
                /* create the game from the GameState */
                Game game = new Game(currNode.Value);
                List <Tuple <int, int> > moves = game.GetMoves(); // valid moves

                /* find best child node (best UCT value) to expand */
                float bestUCTScore   = float.NegativeInfinity;
                int   bestChildIndex = -1;

                // if nnpolicy is null then also all children have no nn output, but possibly a score from endgame position
                for (int i = 0; i < currNode.Children.Count; ++i)
                {
                    float temp_UCT_score = float.NegativeInfinity;

                    // q_value
                    float childWinrate;
                    if (currNode.Children[i].visits != 0)
                    {
                        childWinrate = currNode.Children[i].q_value;
                    }
                    else
                    {
                        childWinrate = -currNode.q_value - Params.FPU_REDUCTION;
                    }

                    // exploration
                    float explorationTerm = 0.0f;
                    if (currNode.nn_policy != null)
                    {
                        // we have the policy output
                        explorationTerm = Params.C_PUCT * currNode.nn_policy[currNode.Children[i].moveIndex] *
                                          (float)Math.Sqrt(currNode.visits + currNode.virtualVisits) / (float)(currNode.Children[i].visits +
                                                                                                               currNode.Children[i].virtualVisits + 1);
                    }
                    else
                    {
                        // assume policy equal for all children if not found yet (because of virtual visits)
                        explorationTerm = Params.C_PUCT * (1.0f / currNode.Children.Count) *
                                          (float)Math.Sqrt(currNode.visits + currNode.virtualVisits) / (float)(currNode.Children[i].visits +
                                                                                                               +currNode.Children[i].virtualVisits + 1);
                    }

                    temp_UCT_score = childWinrate + explorationTerm;

                    if (temp_UCT_score > bestUCTScore)
                    {
                        draws.Clear();
                        bestChildIndex = i;
                        bestUCTScore   = temp_UCT_score;
                    }
                    else if (temp_UCT_score == bestUCTScore)
                    {
                        draws.Add(i);
                    }
                    //Console.WriteLine("winrate " + childWinrate + " exploration " + explorationTerm + " total " + temp_UCT_score);
                }
                if (draws.Count != 0)
                {
                    currNode = currNode.Children[draws[RandomGen2.Next(0, draws.Count)]];
                }
                else
                {
                    currNode = currNode.Children[bestChildIndex];
                }
            }
            return(currNode);
        }