Example #1
0
        public List <PlayerTask> AvailableOptions(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes, bool playCards = true)
        {
            if (currentNode == null)
            {
                Log.Instance().Append(currentNode.FullPathPrint());
                Log.Instance().Append(currentNode.FullPrint());
            }
            if (currentNode.currentPOGame == null)
            {
                Log.Instance().Append(currentNode.FullPathPrint());
                Log.Instance().Append(currentNode.FullPrint());
            }

            List <PlayerTask> options          = currentNode.currentPOGame.CurrentPlayer.Options(playCards);
            List <PlayerTask> availableOptions = new List <PlayerTask>();

            foreach (PlayerTask option in options)
            {
                if (availableOptionTypes.Contains(option.PlayerTaskType))
                {
                    availableOptions.Add(option);
                }
            }
            return(availableOptions);
        }
Example #2
0
        public UCTSimulator(SabberStoneCoreAi.POGame.POGame poGame, int randomSeed, Random rnd,
                            double ucb1Coef, int mcSearchDepth, int mcOpSearchDepth, int maxOpOnePlayOutTimes, int maxOnePlayOutTimes,
                            int[,] BoardCoef, int[] MinionsCoef, int[] OpMinionsCoef)
        {
            this.root    = Root(poGame);
            this.rndSeed = randomSeed;
            if (rnd != null)
            {
                this.rnd = rnd;
            }
            else
            {
                this.rnd = new Random(this.rndSeed);
            }

            this.ucb1Coef             = ucb1Coef;
            this.mcSearchDepth        = mcSearchDepth;
            this.mcOpSearchDepth      = mcOpSearchDepth;
            this.maxOpOnePlayOutTimes = maxOpOnePlayOutTimes;
            this.maxOnePlayOutTimes   = maxOnePlayOutTimes;

            this.BoardCoef     = BoardCoef;
            this.MinionsCoef   = MinionsCoef;
            this.OpMinionsCoef = OpMinionsCoef;

            availableOptionTypes.Add(PlayerTaskType.CHOOSE);
            availableOptionTypes.Add(PlayerTaskType.HERO_ATTACK);
            availableOptionTypes.Add(PlayerTaskType.HERO_POWER);
            availableOptionTypes.Add(PlayerTaskType.MINION_ATTACK);
            availableOptionTypes.Add(PlayerTaskType.PLAY_CARD);

            availableOpOptionTypes.Add(PlayerTaskType.HERO_ATTACK);
            availableOpOptionTypes.Add(PlayerTaskType.HERO_POWER);
            availableOpOptionTypes.Add(PlayerTaskType.MINION_ATTACK);
        }
Example #3
0
        public UCTNode GetChild(int j)
        {
            UCTNode child = null;

            this.childDict.TryGetValue(j, out child);
            return(child);
        }
Example #4
0
        public UCTNode SimulateEndTurn(UCTNode currentNode)
        {
            PlayerTask endTurnOption = GetEndTurnOption(currentNode);

            POGame.POGame simulatedPOGame = null;
            if (endTurnOption == null)
            {
                return(null);
            }
            simulatedPOGame = SimulateOption(currentNode.currentPOGame, endTurnOption);
            //if (endTurnOption == null)
            //{
            //	List<PlayerTask> options = currentNode.currentPOGame.CurrentPlayer.Options();
            //	int rndOptionNo = rnd.Next(options.Count());
            //	PlayerTask rndOption = options[rndOptionNo];
            //	POGame.POGame tmpGame = SimulateOption(currentNode.currentPOGame, rndOption);
            //	endTurnOption = GetEndTurnOption(tmpGame);

            //	simulatedPOGame = SimulateOption(tmpGame, endTurnOption);
            //}
            //else
            //{

            //}
            UCTNode opRootNode = new UCTNode(0, 0, 0, 0, null, endTurnOption, simulatedPOGame, new Dictionary <int, UCTNode>());

            return(opRootNode);
        }
Example #5
0
        public double EvaluateUCTNode(UCTNode currentPlayerLeafNode, long accumulativeMillisecond)
        {
            double reward = Double.MinValue;

            UCTNode opRootNode = SimulateEndTurn(currentPlayerLeafNode);

            if (opRootNode == null)             // choice turn no endturn option
            {
            }
            else
            {
                // if no endturn option ,there is a choice option
                UCTNode bestOpNode = SimulateOppenont(opRootNode, accumulativeMillisecond);
                if (bestOpNode == null)                 // oppent no avaliable option to simulate
                {
                    reward = EvaluateGameCurrentPlayer(currentPlayerLeafNode.currentPOGame);
                }
                else                 // oppent have avaliable option to simulate
                {
                    reward = UCBValue(bestOpNode);
                }
            }


            return(reward);
        }
Example #6
0
        public UCTNode OnePlayOut(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes)
        {
            //UCTNode currentNode =  root;
            int step = 0;

            Debug.Assert(currentNode.currentPOGame.CurrentPlayer.Options().Count() > 0);
            bool isStop = false;

            while (!isStop &&
                   !currentNode.IsGameOver() &&
                   AvailableOptions(currentNode, availableOptionTypes).Count() == currentNode.childDict.Count())
            {
                UCTNode bestChildNode = GetBestChildUCTNode(currentNode);
                //Debug.Assert(bestChildNode != null);

                if (bestChildNode == null)
                {
                    isStop = true;
                }
                else
                {
                    currentNode = bestChildNode;
                    //step++;
                }
            }
            //LinkedList<int> optionList = null;
            UCTNode leafNode = MonteCarloSearch(currentNode, availableOptionTypes, step, mcSearchDepth);

            if (step > maxRealMCSearchTimes)
            {
                maxRealMCSearchTimes = step;
            }
            return(leafNode);
        }
Example #7
0
        public PlayerTask Simulate(UCTNode currentNode, long accumulativeMillisecond)
        {
            if (root.IsCurrentPlayerTurnEnd())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }
            if (root.IsGameOver())
            {
                return(root.currentPOGame.CurrentPlayer.Options()[0]);
            }

            double reward            = 0;
            int    onePlayoutTimes   = 0;
            bool   isStop            = false;
            long   startMilliseconds = DateTimeOffset.Now.ToUnixTimeMilliseconds();

            if (_debug_)
            {
                Log.Instance().Append(currentNode.FullPrint());
            }

            while (!isStop &&
                   onePlayoutTimes < maxOnePlayOutTimes &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - startMilliseconds < this.maxOptionCalculateTime &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < this.maxTurnCalculateTime)
            {
                UCTNode currentPlayerLeafNode = OnePlayOut(currentNode, availableOptionTypes);

                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPathPrint());
                }
                if (_debug_)
                {
                    Log.Instance().Append(currentPlayerLeafNode.FullPrint());
                }

                reward = EvaluateUCTNode(currentPlayerLeafNode, maxTurnCalculateTime);

                if (_debug_)
                {
                    Log.Instance().Append("reward : " + reward.ToString());
                }

                UpdateReward(currentPlayerLeafNode, reward);

                onePlayoutTimes++;
            }
            reward = EvaluateUCTNode(root, maxTurnCalculateTime);
            PlayerTask bestOption = GetBestOption(root, reward);

            if (onePlayoutTimes > maxRealOnePlayOutTimes)
            {
                maxRealOnePlayOutTimes = onePlayoutTimes;
            }
            Debug.Assert(bestOption != null);
            return(bestOption);
        }
Example #8
0
        public UCTNode AddChild(UCTNode parent, PlayerTask option, int j, POGame.POGame simulatedPOGame)
        {
            Debug.Assert(simulatedPOGame != null);
            Debug.Assert(option != null);
            UCTNode newChildNode = new UCTNode(0, 0, j, 0, parent, option, simulatedPOGame, new Dictionary <int, UCTNode>());

            parent.childDict.Add(j, newChildNode);
            return(newChildNode);
        }
Example #9
0
 public UCTNode(SabberStoneCoreAi.POGame.POGame poGame)
 {
     this.reward        = 0;
     this.nj            = 0;
     this.j             = 0;
     this.N             = 0;
     this.parent        = null;
     this.currentPOGame = poGame;
     this.option        = null;
     this.childDict     = new Dictionary <int, UCTNode>();
 }
Example #10
0
        public void UpdateReward(UCTNode leafNode, double reward)
        {
            UCTNode currentNode = leafNode;

            while (currentNode.parent != null)
            {
                currentNode.UpdateReward(reward);
                //Debug.Assert(currentNode.parent.N == currentNode.parent.SumChildN());
                currentNode = currentNode.parent;
            }
            return;
        }
Example #11
0
        public string FullPathPrint()
        {
            var     str         = new StringBuilder();
            UCTNode currentNode = this;

            do
            {
                str.AppendLine(PlayerTaskPrint(currentNode.option));
                currentNode = currentNode.parent;
            } while (currentNode != null);
            return(str.ToString());
        }
Example #12
0
        public PlayerTask GetEndTurnOption(UCTNode currentNode, bool playCards = false)
        {
            List <PlayerTask> optionsList = currentNode.currentPOGame.CurrentPlayer.Options(playCards);

            foreach (PlayerTask option in optionsList)
            {
                if (option.PlayerTaskType == PlayerTaskType.END_TURN)
                {
                    return(option);
                }
            }
            return(null);
        }
Example #13
0
        public UCTNode MonteCarloSearch(UCTNode startNode, List <PlayerTaskType> availableOptionTypes, int step, int depth)
        {
            //GetEndTurnOption(currentNode.currentPOGame) == null
            UCTNode           currentNode = startNode;
            List <PlayerTask> optionsList = AvailableOptions(currentNode, availableOptionTypes);
            bool isStop = false;

            while (!isStop &&
                   step < depth &&
                   optionsList.Count() > 0 &&
                   !currentNode.IsGameOver() ||
                   GetEndTurnOption(currentNode) == null
                   )
            {
                int        rndOptionNo = rnd.Next(optionsList.Count());
                PlayerTask rndOption   = optionsList[rndOptionNo];
                UCTNode    tmpNode     = currentNode.GetChild(rndOptionNo);

                //  if not existed
                if (tmpNode == null)
                {
                    // simulate the option
                    POGame.POGame simulatedPOGame = SimulateOption(currentNode.currentPOGame, rndOption);
                    // some time simulate option returned POGame is null
                    if (simulatedPOGame == null)
                    {
                        isStop = true;
                        Console.WriteLine(currentNode.FullPathPrint());
                        Console.WriteLine(currentNode.FullPrint());
                    }

                    currentNode = currentNode.AddChild(currentNode, rndOption, rndOptionNo, simulatedPOGame);
                }
                //if j child is existed
                else
                {
                    currentNode = tmpNode;
                }
                step++;

                // random select a option
                optionsList = AvailableOptions(currentNode, availableOptionTypes);
            }
            return(currentNode);
        }
Example #14
0
 public UCTNode(double reward,
                int nj,
                int j,
                int N,
                UCTNode parent,
                PlayerTask option,
                SabberStoneCoreAi.POGame.POGame poGame,
                Dictionary <int, UCTNode> childDict)
 {
     this.reward        = reward;
     this.nj            = nj;
     this.j             = j;
     this.N             = N;
     this.parent        = parent;
     this.currentPOGame = poGame;
     this.option        = option;
     this.childDict     = childDict;
 }
Example #15
0
        public UCTNode GetWorstChildUCTNode(UCTNode node)
        {
            PlayerTask worstOption   = null;
            UCTNode    worstNode     = null;
            double     worstUCTValue = Double.MaxValue;

            foreach (KeyValuePair <int, UCTNode> item in node.childDict)
            {
                double uctValue = UCBValue(item.Value);
                if (uctValue < worstUCTValue)
                {
                    worstUCTValue = uctValue;
                    worstNode     = item.Value;
                    worstOption   = item.Value.option;
                }
            }
            return(worstNode);
        }
Example #16
0
        public UCTNode GetBestChildUCTNode(UCTNode node)
        {
            PlayerTask bestOption        = null;
            UCTNode    bestSimulatedNode = null;
            double     bestUCTValue      = Double.MinValue;

            foreach (KeyValuePair <int, UCTNode> item in node.childDict)
            {
                double uctValue = UCBValue(item.Value);
                if (uctValue > bestUCTValue)
                {
                    bestUCTValue      = uctValue;
                    bestSimulatedNode = item.Value;
                    bestOption        = item.Value.option;
                }
            }
            return(bestSimulatedNode);
        }
Example #17
0
        public UCTNode SimulateOppenont(UCTNode currentNode, long accumulativeMillisecond)
        {
            UCTNode opRootNode        = currentNode;
            bool    isStop            = false;
            int     opOnePlayoutTimes = 0;

            if (_debug_)
            {
                Log.Instance().Append("simulateOppenont start");
            }
            while (!isStop &&
                   opOnePlayoutTimes < maxOpOnePlayOutTimes &&
                   DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < maxTurnCalculateTime)
            {
                UCTNode opLeafNode = OnePlayOut(currentNode, availableOpOptionTypes);

                if (_debug_)
                {
                    Log.Instance().Append(opLeafNode.FullPathPrint());
                }
                if (_debug_)
                {
                    Log.Instance().Append(opLeafNode.FullPrint());
                }

                double reward = EvaluateGameCurrentOpponent(opLeafNode.currentPOGame);

                if (_debug_)
                {
                    Log.Instance().Append("reward" + reward.ToString());
                }

                UpdateReward(opLeafNode, reward);
                opOnePlayoutTimes++;
            }
            if (_debug_)
            {
                Log.Instance().Append("simulateOppenont end");
            }
            return(GetWorstChildUCTNode(opRootNode));
        }
Example #18
0
        public PlayerTask GetBestOption(UCTNode node, double rootUCTValue)
        {
            PlayerTask bestOption        = GetEndTurnOption(node);
            UCTNode    bestSimulatedNode = null;
            double     bestUCTValue      = Double.MinValue;

            if (bestOption != null)
            {
                bestSimulatedNode = root;
                bestUCTValue      = rootUCTValue;
            }
            foreach (KeyValuePair <int, UCTNode> item in node.childDict)
            {
                double uctValue = UCBValue(item.Value);
                if (uctValue > bestUCTValue)
                {
                    bestUCTValue      = uctValue;
                    bestSimulatedNode = item.Value;
                    bestOption        = item.Value.option;
                }
            }
            Debug.Assert(bestUCTValue != Double.MinValue);
            return(bestOption);
        }
Example #19
0
 public double UCBValue(UCTNode node)
 {
     Debug.Assert(node.parent != null);
     Debug.Assert(node != null);
     return(UCBValue(node.reward, node.nj, node.parent.N, ucb1Coef));
 }