public void NashEquilibriumPlayer1Test() { // get information set (jack and first action) var infoSet = new InformationSet <GameAction>() { CardBucket = (int)CardValue.Jack, ActionHistory = new List <GameAction>() // No actions => first turn }; var gameNode = trainer.GameNodes[infoSet.GetHashCode()]; var averageStrategy = gameNode.calculateAverageStrategy(); // Jack's bet probability (alpha) must lie within 0 and 1/3 to be in nash equilibrium var alpha = averageStrategy[(int)GameAction.Bet]; float alphaMax = 1f / 3f; float alphaMin = 0f; Assert.IsTrue(alpha < (alphaMax + tolerance)); Assert.IsTrue(alpha > (alphaMin - tolerance)); // sum of bet and pass probability should be approximately one, since there are only two actions in Kuhn Poker var sum = averageStrategy[(int)GameAction.Pass] + averageStrategy[(int)GameAction.Bet]; Assert.IsTrue(sum < 1 + tolerance && sum > 1 - tolerance); // alpha value is further used to evaluate nash equilibrium of player 1 infoSet.CardBucket = (int)CardValue.King; // 3 = King gameNode = trainer.GameNodes[infoSet.GetHashCode()]; averageStrategy = gameNode.calculateAverageStrategy(); // King bet probability should be 3 * alpha var betProbability = averageStrategy[(int)GameAction.Bet]; var betProbabilityExpected = (3 * alpha); Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance); infoSet.CardBucket = (int)CardValue.Queen; // player 1 checked first turn, Player 2 bet second turn. infoSet.ActionHistory = new List <GameAction>() { GameAction.Pass, GameAction.Bet }; gameNode = trainer.GameNodes[infoSet.GetHashCode()]; averageStrategy = gameNode.calculateAverageStrategy(); // Queen should call (bet) with a probability of alpha + 1/3 betProbability = averageStrategy[(int)GameAction.Bet]; betProbabilityExpected = (alpha + 1f / 3f); Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance); }
public void NashEquilibriumPlayer2Test() { // Get information set (jack and first action) var infoSet = new InformationSet <GameAction>() { CardBucket = (int)CardValue.Jack, ActionHistory = new List <GameAction>() { GameAction.Bet } // No actions => first turn }; var gameNode = trainer.GameNodes[infoSet.GetHashCode()]; var averageStrategy = gameNode.calculateAverageStrategy(); // Jack should never bet (i.e. call) after player 1 bet var betProbability = averageStrategy[(int)GameAction.Bet]; var betProbabilityExpected = 0f; Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance); // Queen should bet (i.e. call) 1/3 after player 1 bet infoSet.CardBucket = (int)CardValue.Queen; gameNode = trainer.GameNodes[infoSet.GetHashCode()]; averageStrategy = gameNode.calculateAverageStrategy(); betProbability = averageStrategy[(int)GameAction.Bet]; betProbabilityExpected = (1f / 3f); Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance); // Jack should bet 1/3 of the time after being passed (i.e. checked) to infoSet.CardBucket = (int)CardValue.Jack; infoSet.ActionHistory = new List <GameAction> { GameAction.Pass }; gameNode = trainer.GameNodes[infoSet.GetHashCode()]; averageStrategy = gameNode.calculateAverageStrategy(); betProbability = averageStrategy[(int)GameAction.Bet]; betProbabilityExpected = (1f / 3f); Assert.IsTrue(Math.Abs(betProbability - betProbabilityExpected) < tolerance); }
public void TrainedOneHandTest() { // this number highly depends on abstraction (i.e. stack size, bet size, hand strength and actions) int numberOfStatesPerBucket = 15908; Assert.AreEqual(numberOfStatesPerBucket, trainer.GameNodes.Count); //check if information sets are complete var infoSet = new InformationSet <ActionBucket>() { CardBucket = (int)StartHandBucket.Best, ActionHistory = new List <ActionBucket>() }; //check if some nodes has been added to the dictionary var gameNode = trainer.GameNodes[infoSet.GetHashCode()]; Assert.IsNotNull(gameNode); infoSet = new InformationSet <ActionBucket>() { CardBucket = (int)StartHandBucket.Best, ActionHistory = new List <ActionBucket>() { ActionBucket.Call, ActionBucket.LowBet } }; gameNode = trainer.GameNodes[infoSet.GetHashCode()]; Assert.IsNotNull(gameNode); infoSet = new InformationSet <ActionBucket>() { CardBucket = (int)StartHandBucket.Best, ActionHistory = new List <ActionBucket>() { ActionBucket.LowBet, ActionBucket.LowBet, ActionBucket.LowBet, ActionBucket.LowBet } }; gameNode = trainer.GameNodes[infoSet.GetHashCode()]; Assert.IsNotNull(gameNode); infoSet = new InformationSet <ActionBucket>() { CardBucket = (int)StartHandBucket.Worst, ActionHistory = new List <ActionBucket>() { ActionBucket.LowBet, ActionBucket.LowBet, ActionBucket.LowBet } }; gameNode = trainer.GameNodes[infoSet.GetHashCode()]; Assert.IsNotNull(gameNode); }
private List <float> getOptimalStrategy(byte handBucket, List <ActionBucket> actions) { var infoSet = new InformationSet <ActionBucket>() { CardBucket = handBucket, ActionHistory = actions }; var gameNode = trainedTree[infoSet.GetHashCode()]; Assert.IsNotNull(gameNode); return(gameNode.calculateAverageStrategy()); }
/// <summary> /// Recursively implements the Counterfactual Regret Minimization algorithm /// /// </summary> /// <param name="cards">Hand cards of player 1 and 2</param> /// <param name="actions">Action History</param> /// <param name="probability0">Accumulated action probability of player 1</param> /// <param name="probability1">Accumulated action probability of player 2</param> /// <returns></returns> private float CalculateCounterFactualRegret(int[] cards, List <GameAction> actions, float probability0, float probability1) { int plays = actions.Count; int player = plays % 2; int opponent = 1 - player; if (plays > 1) { bool isLastActionPass = (actions.Last() == GameAction.Pass); bool isSecondLastActionPass = (actions[actions.Count - 2] == GameAction.Pass); bool isPlayerCardHigher = cards[player] > cards[opponent]; bool isDoubleBet = !isLastActionPass && !isSecondLastActionPass; bool isDoublePass = isLastActionPass && isSecondLastActionPass; if (isLastActionPass) { if (isDoublePass) { return(isPlayerCardHigher ? 1 : -1); } else { return(1); } } else if (isDoubleBet) { return(isPlayerCardHigher ? 2 : -2); } } var infoSet = new InformationSet <GameAction>() { CardBucket = cards[player], ActionHistory = actions }; RegretGameNode <GameAction> node = null; var hash = infoSet.GetHashCode(); if (!GameNodes.TryGetValue(hash, out node)) { node = new RegretGameNode <GameAction>(Settings.NumberOfActions); node.InfoSet = infoSet; GameNodes.Add(hash, node); } var strategy = node.calculateStrategy(player == 0 ? probability0 : probability1); var utilities = new List <float>(Settings.NumberOfActions) { 0, 0 }; float nodeUtility = 0; for (int i = 0; i < Settings.NumberOfActions; i++) { var nextAction = (i == 0) ? GameAction.Pass : GameAction.Bet; var nextHistory = new List <GameAction>(actions); nextHistory.Add(nextAction); utilities[i] = player == 0 ? -CalculateCounterFactualRegret(cards, nextHistory, probability0 * strategy[i], probability1) : -CalculateCounterFactualRegret(cards, nextHistory, probability0, probability1 * strategy[i]); nodeUtility += strategy[i] * utilities[i]; } for (int i = 0; i < Settings.NumberOfActions; i++) { float regret = utilities[i] - nodeUtility; node.RegretSum[i] += (player == 0 ? probability1 : probability0) * regret; } return(nodeUtility); }