예제 #1
0
        public void NashEquilibriumPlayer1Test()
        {
            // get information set (jack and first action)
            var infoSet = new InformationSet <GameAction>()
            {
                CardBucket    = (int)CardValue.Jack,
                ActionHistory = new List <GameAction>() // No actions => first turn
            };

            var gameNode        = trainer.GameNodes[infoSet.GetHashCode()];
            var averageStrategy = gameNode.calculateAverageStrategy();

            // Jack's bet probability (alpha) must lie within 0 and 1/3 to be in nash equilibrium
            var   alpha    = averageStrategy[(int)GameAction.Bet];
            float alphaMax = 1f / 3f;
            float alphaMin = 0f;

            Assert.IsTrue(alpha < (alphaMax + tolerance));
            Assert.IsTrue(alpha > (alphaMin - tolerance));

            // sum of bet and pass probability should be approximately one, since there are only two actions in Kuhn Poker
            var sum = averageStrategy[(int)GameAction.Pass] + averageStrategy[(int)GameAction.Bet];

            Assert.IsTrue(sum < 1 + tolerance && sum > 1 - tolerance);

            // alpha value is further used to evaluate nash equilibrium of player 1
            infoSet.CardBucket = (int)CardValue.King; // 3 = King
            gameNode           = trainer.GameNodes[infoSet.GetHashCode()];
            averageStrategy    = gameNode.calculateAverageStrategy();

            // King bet probability should be 3 * alpha
            var betProbability         = averageStrategy[(int)GameAction.Bet];
            var betProbabilityExpected = (3 * alpha);

            Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance);

            infoSet.CardBucket = (int)CardValue.Queen;
            // player 1 checked first turn, Player 2 bet second turn.
            infoSet.ActionHistory = new List <GameAction>()
            {
                GameAction.Pass, GameAction.Bet
            };
            gameNode        = trainer.GameNodes[infoSet.GetHashCode()];
            averageStrategy = gameNode.calculateAverageStrategy();

            // Queen should call (bet) with a probability of alpha + 1/3
            betProbability         = averageStrategy[(int)GameAction.Bet];
            betProbabilityExpected = (alpha + 1f / 3f);
            Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance);
        }
예제 #2
0
        public void NashEquilibriumPlayer2Test()
        {
            // Get information set (jack and first action)
            var infoSet = new InformationSet <GameAction>()
            {
                CardBucket    = (int)CardValue.Jack,
                ActionHistory = new List <GameAction>()
                {
                    GameAction.Bet
                }                                                         // No actions => first turn
            };

            var gameNode        = trainer.GameNodes[infoSet.GetHashCode()];
            var averageStrategy = gameNode.calculateAverageStrategy();

            // Jack should never bet (i.e. call) after player 1 bet
            var betProbability         = averageStrategy[(int)GameAction.Bet];
            var betProbabilityExpected = 0f;

            Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance);

            // Queen should bet (i.e. call) 1/3 after player 1 bet
            infoSet.CardBucket     = (int)CardValue.Queen;
            gameNode               = trainer.GameNodes[infoSet.GetHashCode()];
            averageStrategy        = gameNode.calculateAverageStrategy();
            betProbability         = averageStrategy[(int)GameAction.Bet];
            betProbabilityExpected = (1f / 3f);
            Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance);


            // Jack should bet 1/3 of the time after being passed (i.e. checked) to
            infoSet.CardBucket    = (int)CardValue.Jack;
            infoSet.ActionHistory = new List <GameAction> {
                GameAction.Pass
            };
            gameNode               = trainer.GameNodes[infoSet.GetHashCode()];
            averageStrategy        = gameNode.calculateAverageStrategy();
            betProbability         = averageStrategy[(int)GameAction.Bet];
            betProbabilityExpected = (1f / 3f);
            Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance);
        }
예제 #3
0
        public void TrainedOneHandTest()
        {
            // this number highly depends on abstraction (i.e. stack size, bet size, hand strength and actions)
            int numberOfStatesPerBucket = 15908;

            Assert.AreEqual(numberOfStatesPerBucket, trainer.GameNodes.Count);

            //check if information sets are complete
            var infoSet = new InformationSet <ActionBucket>()
            {
                CardBucket    = (int)StartHandBucket.Best,
                ActionHistory = new List <ActionBucket>()
            };

            //check if some nodes has been added to the dictionary
            var gameNode = trainer.GameNodes[infoSet.GetHashCode()];

            Assert.IsNotNull(gameNode);

            infoSet = new InformationSet <ActionBucket>()
            {
                CardBucket    = (int)StartHandBucket.Best,
                ActionHistory = new List <ActionBucket>()
                {
                    ActionBucket.Call, ActionBucket.LowBet
                }
            };
            gameNode = trainer.GameNodes[infoSet.GetHashCode()];
            Assert.IsNotNull(gameNode);

            infoSet = new InformationSet <ActionBucket>()
            {
                CardBucket    = (int)StartHandBucket.Best,
                ActionHistory = new List <ActionBucket>()
                {
                    ActionBucket.LowBet, ActionBucket.LowBet, ActionBucket.LowBet, ActionBucket.LowBet
                }
            };
            gameNode = trainer.GameNodes[infoSet.GetHashCode()];
            Assert.IsNotNull(gameNode);

            infoSet = new InformationSet <ActionBucket>()
            {
                CardBucket    = (int)StartHandBucket.Worst,
                ActionHistory = new List <ActionBucket>()
                {
                    ActionBucket.LowBet, ActionBucket.LowBet, ActionBucket.LowBet
                }
            };
            gameNode = trainer.GameNodes[infoSet.GetHashCode()];
            Assert.IsNotNull(gameNode);
        }
예제 #4
0
        private List <float> getOptimalStrategy(byte handBucket, List <ActionBucket> actions)
        {
            var infoSet = new InformationSet <ActionBucket>()
            {
                CardBucket    = handBucket,
                ActionHistory = actions
            };

            var gameNode = trainedTree[infoSet.GetHashCode()];

            Assert.IsNotNull(gameNode);
            return(gameNode.calculateAverageStrategy());
        }
예제 #5
0
        /// <summary>
        /// Recursively implements the Counterfactual Regret Minimization algorithm
        ///
        /// </summary>
        /// <param name="cards">Hand cards of player 1 and 2</param>
        /// <param name="actions">Action History</param>
        /// <param name="probability0">Accumulated action probability of player 1</param>
        /// <param name="probability1">Accumulated action probability of player 2</param>
        /// <returns></returns>
        private float CalculateCounterFactualRegret(int[] cards, List <GameAction> actions, float probability0, float probability1)
        {
            int plays    = actions.Count;
            int player   = plays % 2;
            int opponent = 1 - player;

            if (plays > 1)
            {
                bool isLastActionPass       = (actions.Last() == GameAction.Pass);
                bool isSecondLastActionPass = (actions[actions.Count - 2] == GameAction.Pass);
                bool isPlayerCardHigher     = cards[player] > cards[opponent];
                bool isDoubleBet            = !isLastActionPass && !isSecondLastActionPass;
                bool isDoublePass           = isLastActionPass && isSecondLastActionPass;

                if (isLastActionPass)
                {
                    if (isDoublePass)
                    {
                        return(isPlayerCardHigher ? 1 : -1);
                    }
                    else
                    {
                        return(1);
                    }
                }
                else if (isDoubleBet)
                {
                    return(isPlayerCardHigher ? 2 : -2);
                }
            }

            var infoSet = new InformationSet <GameAction>()
            {
                CardBucket    = cards[player],
                ActionHistory = actions
            };

            RegretGameNode <GameAction> node = null;
            var hash = infoSet.GetHashCode();

            if (!GameNodes.TryGetValue(hash, out node))
            {
                node         = new RegretGameNode <GameAction>(Settings.NumberOfActions);
                node.InfoSet = infoSet;
                GameNodes.Add(hash, node);
            }

            var strategy  = node.calculateStrategy(player == 0 ? probability0 : probability1);
            var utilities = new List <float>(Settings.NumberOfActions)
            {
                0, 0
            };
            float nodeUtility = 0;

            for (int i = 0; i < Settings.NumberOfActions; i++)
            {
                var nextAction  = (i == 0) ? GameAction.Pass : GameAction.Bet;
                var nextHistory = new List <GameAction>(actions);
                nextHistory.Add(nextAction);

                utilities[i] = player == 0
                    ? -CalculateCounterFactualRegret(cards, nextHistory, probability0 * strategy[i], probability1)
                   : -CalculateCounterFactualRegret(cards, nextHistory, probability0, probability1 * strategy[i]);

                nodeUtility += strategy[i] * utilities[i];
            }

            for (int i = 0; i < Settings.NumberOfActions; i++)
            {
                float regret = utilities[i] - nodeUtility;
                node.RegretSum[i] += (player == 0 ? probability1 : probability0) * regret;
            }

            return(nodeUtility);
        }