Example #1
0
        public PlayerTask Simulate(UCTNode currentNode, long accumulativeMillisecond)
        {
            if (root.IsCurrentPlayerTurnEnd())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }
            if (root.IsGameOver())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }

            double reward            = 0;
            int    onePlayoutTimes   = 0;
            bool   isStop            = false;
            long   startMilliseconds = DateTimeOffset.Now.ToUnixTimeMilliseconds();

            if (_debug_)
            {
                Log.Instance().Append(currentNode.FullPrint());
            }

            while (!isStop &&
                   onePlayoutTimes < maxOnePlayOutTimes &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - startMilliseconds < this.maxOptionCalculateTime &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < this.maxTurnCalculateTime)
            {
                UCTNode currentPlayerLeafNode = OnePlayOut(currentNode, availableOptionTypes);

                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPathPrint());
                }
                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPrint());
                }

                reward = EvaluateUCTNode(currentPlayerLeafNode, maxTurnCalculateTime);

                if (_debug_)
                {
                    Log.Instance().Append("reward : " + reward.ToString());
                }

                UpdateReward(currentPlayerLeafNode, reward);

                onePlayoutTimes++;
            }
            reward = EvaluateUCTNode(root, maxTurnCalculateTime);
            PlayerTask bestOption = GetBestOption(root, reward);

            if (onePlayoutTimes > maxRealOnePlayOutTimes)
            {
                maxRealOnePlayOutTimes = onePlayoutTimes;
            }
            Debug.Assert(bestOption != null);
            return(bestOption);
        }