예제 #1
0
        public UCTNode OnePlayOut(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes)
        {
            //UCTNode currentNode =  root;
            int step = 0;

            Debug.Assert(currentNode.currentPOGame.CurrentPlayer.Options().Count() > 0);
            bool isStop = false;

            while (!isStop &&
                   !currentNode.IsGameOver() &&
                   AvailableOptions(currentNode, availableOptionTypes).Count() == currentNode.childDict.Count())
            {
                UCTNode bestChildNode = GetBestChildUCTNode(currentNode);
                //Debug.Assert(bestChildNode != null);

                if (bestChildNode == null)
                {
                    isStop = true;
                }
                else
                {
                    currentNode = bestChildNode;
                    //step++;
                }
            }
            //LinkedList<int> optionList = null;
            UCTNode leafNode = MonteCarloSearch(currentNode, availableOptionTypes, step, mcSearchDepth);

            if (step > maxRealMCSearchTimes)
            {
                maxRealMCSearchTimes = step;
            }
            return(leafNode);
        }
예제 #2
0
        public PlayerTask Simulate(UCTNode currentNode, long accumulativeMillisecond)
        {
            if (root.IsCurrentPlayerTurnEnd())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }
            if (root.IsGameOver())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }

            double reward            = 0;
            int    onePlayoutTimes   = 0;
            bool   isStop            = false;
            long   startMilliseconds = DateTimeOffset.Now.ToUnixTimeMilliseconds();

            if (_debug_)
            {
                Log.Instance().Append(currentNode.FullPrint());
            }

            while (!isStop &&
                   onePlayoutTimes < maxOnePlayOutTimes &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - startMilliseconds < this.maxOptionCalculateTime &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < this.maxTurnCalculateTime)
            {
                UCTNode currentPlayerLeafNode = OnePlayOut(currentNode, availableOptionTypes);

                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPathPrint());
                }
                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPrint());
                }

                reward = EvaluateUCTNode(currentPlayerLeafNode, maxTurnCalculateTime);

                if (_debug_)
                {
                    Log.Instance().Append("reward : " + reward.ToString());
                }

                UpdateReward(currentPlayerLeafNode, reward);

                onePlayoutTimes++;
            }
            reward = EvaluateUCTNode(root, maxTurnCalculateTime);
            PlayerTask bestOption = GetBestOption(root, reward);

            if (onePlayoutTimes > maxRealOnePlayOutTimes)
            {
                maxRealOnePlayOutTimes = onePlayoutTimes;
            }
            Debug.Assert(bestOption != null);
            return(bestOption);
        }
예제 #3
0
        public UCTNode MonteCarloSearch(UCTNode startNode, List <PlayerTaskType> availableOptionTypes, int step, int depth)
        {
            //GetEndTurnOption(currentNode.currentPOGame) == null
            UCTNode           currentNode = startNode;
            List <PlayerTask> optionsList = AvailableOptions(currentNode, availableOptionTypes);
            bool isStop = false;

            while (!isStop &&
                   step < depth &&
                   optionsList.Count() > 0 &&
                   !currentNode.IsGameOver() ||
                   GetEndTurnOption(currentNode) == null
                   )
            {
                int        rndOptionNo = rnd.Next(optionsList.Count());
                PlayerTask rndOption   = optionsList[rndOptionNo];
                UCTNode    tmpNode     = currentNode.GetChild(rndOptionNo);

                //  if not existed
                if (tmpNode == null)
                {
                    // simulate the option
                    POGame.POGame simulatedPOGame = SimulateOption(currentNode.currentPOGame, rndOption);
                    // some time simulate option returned POGame is null
                    if (simulatedPOGame == null)
                    {
                        isStop = true;
                        Console.WriteLine(currentNode.FullPathPrint());
                        Console.WriteLine(currentNode.FullPrint());
                    }

                    currentNode = currentNode.AddChild(currentNode, rndOption, rndOptionNo, simulatedPOGame);
                }
                //if j child is existed
                else
                {
                    currentNode = tmpNode;
                }
                step++;

                // random select a option
                optionsList = AvailableOptions(currentNode, availableOptionTypes);
            }
            return(currentNode);
        }