public int chooseBet(Deck deck) { int action = 0; state = deck.getBettingState(); float[] qScores = new float[numActions]; for (int act = 0; act < numActions; act++) { var input = actionPlusState(act, state); var a = net.forward(input); qScores[act] = a[0]; } //do epsilon greedy action selection if (eps > r.NextDouble()) //choose best action { action = getMaxAct(qScores); } else // choose worst { action = getExploreAction(qScores); } return(action); }
//0 for stand, 1 for hit public int choosePlayerAction(Hand playerHand, Hand dealerHand, Deck deck) { int action = 0; int playerVal = playerHand.getValue(); int dealerShown = dealerHand.getDealerShowing(); var aceVal = playerHand.getAceValue(); var dubVal = 0.0f; if (playerHand.canDouble()) { dubVal = 1.0f; } var splVal = 0.0f; if (playerHand.canSplit()) { splVal = 1.0f; } state = genInputVec(playerVal, dealerShown, deck, aceVal, dubVal, splVal); float[] qScores = new float[numActions]; for (int act = 0; act < numActions; act++) { var input = actionPlusState(act, state); var a = net.forward(input); qScores[act] = a[0]; } //do epsilon greedy action selection if (eps > r.NextDouble()) //choose best action { action = getMaxAct(qScores, playerHand); } else // choose worst { action = getExploreAction(qScores, playerHand); } action = shouldSplit(playerHand, dealerHand, action, playerHand.canSplit()); return(action); }