public Dictionary <string, uint> buckets = new Dictionary <string, uint>(); // memoize buckets to avoid unnecessary discretization public Node(LimitHoldEmState state, Node parent = null) { this.state = state; this.parent = parent; unexploredActions = this.state.GetActions(); string[] bucketKeys = new string[2] { state.holeCards[0], state.holeCards[1] }; uint discStreet = 0; if (parent != null) { buckets = parent.buckets; for (int i = 0; i < 2; ++i) { bucketKeys[i] += parent.state.communityCards; } discStreet = parent.state.street; } if (!buckets.ContainsKey(bucketKeys[state.playerIndex])) { for (uint i = 0; i < 2; ++i) { buckets[bucketKeys[i]] = state.Discretize(discStreet, i); } } policyKey = buckets[bucketKeys[(state.playerIndex == 0) ? 1u : 0]].ToString() + state.previousActions; }
public object Clone() { LimitHoldEmState newState = (LimitHoldEmState)MemberwiseClone(); newState.cards = new List <string>(cards); newState.potContributions = (uint[])potContributions.Clone(); newState.holeCards = (string[])holeCards.Clone(); return(newState); }
public Node Expand() { LimitHoldEmAction action = unexploredActions[0]; unexploredActions.RemoveAt(0); LimitHoldEmState newState = state.Step(action); Node childNode = new Node(newState, this); children.Add(childNode); return(childNode); }
public double Rollout(Node node) { LimitHoldEmState currentState = node.state; while (!currentState.isTerminal) { List <LimitHoldEmAction> actions = currentState.GetActions(); Random random = new Random(); LimitHoldEmAction action = actions[random.Next(0, actions.Count)]; currentState = currentState.Step(action); } return(currentState.reward); }
public void Update(uint n) { LimitHoldEmState lhes = new LimitHoldEmState(); lhes.Reset(); Node node = new Node(lhes); MonteCarleTreeSearch mcts = new MonteCarleTreeSearch(this, node, true); for (uint i = 0; i < n; ++i) { Console.WriteLine("Iteration " + i.ToString() + " of " + n.ToString()); mcts.Reset(); mcts.Search(); ++nEpisodes; } }
public void Initialize() { LimitHoldEmState state = new LimitHoldEmState(); state.Reset(); List <LimitHoldEmState> states = new List <LimitHoldEmState> { (LimitHoldEmState)state.Clone() }; uint[] nBuckets = new uint[4] { 169, 1000, 500, 200 }; for (int i = 0; i < 169; ++i) { policy[i.ToString()] = new InformationState(); } while (states.Count != 0) { state = states[0]; states.RemoveAt(0); List <LimitHoldEmAction> actions = state.GetActions(); foreach (LimitHoldEmAction action in actions) { LimitHoldEmState newState = state.Step(action); for (int bucket = 0; bucket < nBuckets[state.street]; ++bucket) { policy[bucket.ToString() + newState.previousActions] = new InformationState(); } if (!newState.isTerminal) { states.Add(newState); } } } }
static void Play(LHEPolicy lhePolicy, uint userIndex = 0) { LimitHoldEmState state = new LimitHoldEmState(); LimitHoldEmAction selectedAction = LimitHoldEmAction.Call; string selectedActionName = ""; double winnings = 0; while (true) { state.Reset(); while (!state.isTerminal) { if (state.playerIndex == userIndex) { Console.WriteLine("Your cards: " + state.holeCards[userIndex]); Console.WriteLine("Community cards: " + state.communityCards); Console.WriteLine("Total winnings so far: " + winnings); uint pot = state.potContributions[0] + state.potContributions[1]; Console.WriteLine("Pot: " + pot); selectedActionName = ""; while (selectedActionName != "c" && selectedActionName != "f" && selectedActionName != "r") { selectedActionName = Console.ReadLine(); selectedActionName = selectedActionName.ToLower(); if (selectedActionName == "q" || selectedActionName == "quit") { Environment.Exit(0); } } switch (selectedActionName) { case "c": selectedAction = LimitHoldEmAction.Call; break; case "f": selectedAction = LimitHoldEmAction.Fold; break; case "r": selectedAction = LimitHoldEmAction.Raise; break; } } else { uint bucket = state.Discretize(state.street, state.playerIndex); string policyKey = bucket.ToString() + state.previousActions; string actionName; LimitHoldEmAction bestAction = LimitHoldEmAction.Call; double bestValue = double.MinValue; string bestActionName = "c"; foreach (LimitHoldEmAction action in state.GetActions()) { switch (action) { case LimitHoldEmAction.Fold: actionName = "f"; break; case LimitHoldEmAction.Raise: actionName = "r"; break; default: actionName = "c"; break; } string newKey = policyKey + actionName; double actionValue = lhePolicy.policy[newKey].value; if (state.playerIndex == 1) { actionValue *= -1; } if (actionValue > bestValue) { bestValue = actionValue; bestAction = action; bestActionName = actionName; } selectedAction = bestAction; selectedActionName = bestActionName; } Console.WriteLine("\nSmoothUCT: " + selectedActionName + "\n"); } state = state.Step(selectedAction); } uint opponent = (userIndex == 0) ? 1u : 0; Console.WriteLine("\nSmoothUCT cards: " + state.holeCards[opponent]); double hand_winnings; if ((state.reward > 0 && userIndex == 0) || (state.reward < 0 && userIndex == 1)) { hand_winnings = state.potContributions[opponent]; Console.WriteLine("You win " + hand_winnings); winnings += hand_winnings; } else if (state.reward == 0) { Console.WriteLine("Split pot"); } else { hand_winnings = state.potContributions[userIndex]; Console.WriteLine("SmoothUCT wins " + hand_winnings); winnings -= hand_winnings; } userIndex = (userIndex + 1) % 2; Console.WriteLine("=========================================================================================="); } }
public LimitHoldEmState Step(LimitHoldEmAction action) { LimitHoldEmState newState = (LimitHoldEmState)Clone(); uint opponent = (newState.playerIndex == 0) ? 1u : 0; switch (action) { case LimitHoldEmAction.Fold: newState.previousActions += "f"; newState.isTerminal = true; newState.reward = newState.potContributions[newState.playerIndex]; if (newState.playerIndex == 0) // max player is folding { newState.reward *= -1; } break; case LimitHoldEmAction.Raise: newState.previousActions += "r"; ++newState.nStreetBets; newState.potContributions[playerIndex] = newState.potContributions[opponent] + ((newState.street < 2) ? 2u : 4u); newState.playerIndex = opponent; break; case LimitHoldEmAction.Call: newState.previousActions += "c"; newState.potContributions[newState.playerIndex] = newState.potContributions[opponent]; // player checks if (newState.nStreetBets == 0 && ((newState.street == 0 && newState.playerIndex == 0) || newState.street > 0 && newState.playerIndex == 1)) { newState.playerIndex = opponent; } // player calls else { if (newState.street == 0) { // advance from preflop to flop newState.communityCards = newState.Deal() + " " + newState.Deal() + " " + newState.Deal(); } else if (newState.street == 1 || newState.street == 2) { // advance from flop to turn or turn to river newState.communityCards += " " + newState.Deal(); } else { // end of hand newState.isTerminal = true; ulong boardMask = Hand.ParseHand(newState.communityCards); ulong[] handValue = new ulong[2]; for (int i = 0; i < 2; ++i) { handValue[i] = Hand.Evaluate(boardMask | Hand.ParseHand(newState.holeCards[i]), 7); } // ties would set reward to 0.0 but that is already the default if (handValue[0] != handValue[1]) { // both players are guaranteed to have contributed same amount newState.reward = newState.potContributions[0]; if (handValue[1] > handValue[0]) { newState.reward *= -1; } } } newState.nStreetBets = 0; ++newState.street; newState.playerIndex = 1u; } break; } return(newState); }
public void Reset() { LimitHoldEmState state = root.state.Reset(); root = new Node(state); }