Beispiel #1
0
        public List <PlayerTask> AvailableOptions(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes, bool playCards = true)
        {
            if (currentNode == null)
            {
                Log.Instance().Append(currentNode.FullPathPrint());
                Log.Instance().Append(currentNode.FullPrint());
            }
            if (currentNode.currentPOGame == null)
            {
                Log.Instance().Append(currentNode.FullPathPrint());
                Log.Instance().Append(currentNode.FullPrint());
            }

            List <PlayerTask> options          = currentNode.currentPOGame.CurrentPlayer.Options(playCards);
            List <PlayerTask> availableOptions = new List <PlayerTask>();

            foreach (PlayerTask option in options)
            {
                if (availableOptionTypes.Contains(option.PlayerTaskType))
                {
                    availableOptions.Add(option);
                }
            }
            return(availableOptions);
        }
Beispiel #2
0
        public PlayerTask Simulate(UCTNode currentNode, long accumulativeMillisecond)
        {
            if (root.IsCurrentPlayerTurnEnd())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }
            if (root.IsGameOver())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }

            double reward            = 0;
            int    onePlayoutTimes   = 0;
            bool   isStop            = false;
            long   startMilliseconds = DateTimeOffset.Now.ToUnixTimeMilliseconds();

            if (_debug_)
            {
                Log.Instance().Append(currentNode.FullPrint());
            }

            while (!isStop &&
                   onePlayoutTimes < maxOnePlayOutTimes &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - startMilliseconds < this.maxOptionCalculateTime &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < this.maxTurnCalculateTime)
            {
                UCTNode currentPlayerLeafNode = OnePlayOut(currentNode, availableOptionTypes);

                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPathPrint());
                }
                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPrint());
                }

                reward = EvaluateUCTNode(currentPlayerLeafNode, maxTurnCalculateTime);

                if (_debug_)
                {
                    Log.Instance().Append("reward : " + reward.ToString());
                }

                UpdateReward(currentPlayerLeafNode, reward);

                onePlayoutTimes++;
            }
            reward = EvaluateUCTNode(root, maxTurnCalculateTime);
            PlayerTask bestOption = GetBestOption(root, reward);

            if (onePlayoutTimes > maxRealOnePlayOutTimes)
            {
                maxRealOnePlayOutTimes = onePlayoutTimes;
            }
            Debug.Assert(bestOption != null);
            return(bestOption);
        }
Beispiel #3
0
        public UCTNode MonteCarloSearch(UCTNode startNode, List <PlayerTaskType> availableOptionTypes, int step, int depth)
        {
            //GetEndTurnOption(currentNode.currentPOGame) == null
            UCTNode           currentNode = startNode;
            List <PlayerTask> optionsList = AvailableOptions(currentNode, availableOptionTypes);
            bool isStop = false;

            while (!isStop &&
                   step < depth &&
                   optionsList.Count() > 0 &&
                   !currentNode.IsGameOver() ||
                   GetEndTurnOption(currentNode) == null
                   )
            {
                int        rndOptionNo = rnd.Next(optionsList.Count());
                PlayerTask rndOption   = optionsList[rndOptionNo];
                UCTNode    tmpNode     = currentNode.GetChild(rndOptionNo);

                //  if not existed
                if (tmpNode == null)
                {
                    // simulate the option
                    POGame.POGame simulatedPOGame = SimulateOption(currentNode.currentPOGame, rndOption);
                    // some time simulate option returned POGame is null
                    if (simulatedPOGame == null)
                    {
                        isStop = true;
                        Console.WriteLine(currentNode.FullPathPrint());
                        Console.WriteLine(currentNode.FullPrint());
                    }

                    currentNode = currentNode.AddChild(currentNode, rndOption, rndOptionNo, simulatedPOGame);
                }
                //if j child is existed
                else
                {
                    currentNode = tmpNode;
                }
                step++;

                // random select a option
                optionsList = AvailableOptions(currentNode, availableOptionTypes);
            }
            return(currentNode);
        }
Beispiel #4
0
        public UCTNode SimulateOppenont(UCTNode currentNode, long accumulativeMillisecond)
        {
            UCTNode opRootNode        = currentNode;
            bool    isStop            = false;
            int     opOnePlayoutTimes = 0;

            if (_debug_)
            {
                Log.Instance().Append("simulateOppenont start");
            }
            while (!isStop &&
                   opOnePlayoutTimes < maxOpOnePlayOutTimes &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < maxTurnCalculateTime)
            {
                UCTNode opLeafNode = OnePlayOut(currentNode, availableOpOptionTypes);

                if (_debug_)
                {
                    Log.Instance().Append(opLeafNode.FullPathPrint());
                }
                if (_debug_)
                {
                    Log.Instance().Append(opLeafNode.FullPrint());
                }

                double reward = EvaluateGameCurrentOpponent(opLeafNode.currentPOGame);

                if (_debug_)
                {
                    Log.Instance().Append("reward" + reward.ToString());
                }

                UpdateReward(opLeafNode, reward);
                opOnePlayoutTimes++;
            }
            if (_debug_)
            {
                Log.Instance().Append("simulateOppenont end");
            }
            return(GetWorstChildUCTNode(opRootNode));
        }