/// <summary> /// Go over all actions and return the one with the highest Q value. /// </summary> /// <param name="control"></param> /// <param name="state">The state of control to use.</param> /// <param name="isLegal">If true, the returned action has to be considered legal in the control.</param> /// <returns></returns> protected override Actione getMaxAction(GameControlBase control, State state, bool isLegal) { int maxID = 0; double max = 0; if (IsMultidimensionalOutput) { // Feed the state to the neural network and find the maximum NeuralNet.Feed(CreateInputArray(state.Board)); for (int id = 0; id < control.ActionNum; id++) { if (NeuralNet.Activations[NeuralNet.Activations.Count - 1][id] > max) { if (isLegal) // If the action has to be legal { if (control.IsLegalAction(new Actione(id))) { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][id]; } } else { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][id]; } } } } else { for (int id = 0; id < control.ActionNum; id++) { // Feed the state and the action to the neural network NeuralNet.Feed(CreateInputArray(state.Board, id)); if (NeuralNet.Activations[NeuralNet.Activations.Count - 1][0] > max) { if (isLegal) // If the action has to be legal { if (control.IsLegalAction(new Actione(id))) { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][0]; } } else { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][0]; } } } } return(new Actione(maxID)); // Return the best action }
/// <summary> /// Execute a move by the given bot /// </summary> /// <param name="against">A bot, if null then takes random action</param> protected void BotMove(Bot against) { Actione botAction; if (!Control.IsTerminalState()) { if (against != null) { against.TakeAction(Control, Control.GetState()); } else // Find a random legal action { botAction = new Actione(rand.Next(Control.ActionNum)); while (!Control.IsLegalAction(botAction)) { botAction = new Actione(rand.Next(Control.ActionNum)); } Control.DoAction(botAction); } } }
/// <summary> /// Go over all actions and return the one with the highest Q value. /// </summary> /// <param name="control"></param> /// <param name="state">The state of control to use.</param> /// <param name="isLegal">If true, the returned action has to be considered legal in the control.</param> /// <returns></returns> protected override Actione getMaxAction(GameControlBase control, State state, bool isLegal) { double[] tmp = new double[ActionNum]; double[] tmpVals = new double[ActionNum]; for (int x = 0; x < ActionNum; x++) // Fill a temporary array { tmp[x] = x; tmpVals[x] = Q_Table[state.ID, x]; } for (int i = 0; i < ActionNum; i++) // Selection Sort { for (int i2 = i; i2 < ActionNum; i2++) { if (tmpVals[i2] > tmpVals[i]) { Swap(tmp, i, i2); Swap(tmpVals, i, i2); } } } if (isLegal) // Return the best action (first in sorted array), or if it is required to be legal return the first legal action. { for (int i = 0; i < ActionNum; i++) { if (control.IsLegalAction(new Actione((int)tmp[i]))) { return(new Actione((int)tmp[i])); } } return(null); } else { return(new Actione((int)tmp[0])); } }
/// <summary> /// A wrapper function to the recursive function, that returns the best action /// </summary> /// <param name="control">Control that the state is in</param> /// <param name="s">The current control state</param> /// <param name="alpha">Has to be int.MinValue</param> /// <param name="beta">Has to be int.MaxVakue</param> /// <param name="player">The player that is maximizing</param> private Actione BestMove(GameControlBase control, State s, double alpha, double beta, Players player) { // This is needed in order to register the next action as the opponent's Players opponent; if (player == Players.Player1) { opponent = Players.Player2; } else { opponent = Players.Player1; } int maxID = 0; double maxReward = -1000000; double Reward = 0; bool allSame = true; // Are all the rewards the same? Actione botAction; State currState = (State)s.Clone(); // Clone the state because it is a reference, so save the original state for (int i = 0; i < control.ActionNum; i++) { if (control.IsLegalAction(new Actione(i), currState)) { // Make the action as the player s.Copy(currState); control.RegisterAction(currState, new Actione(i), player); if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns { Reward = control.GetReward(player, currState); } else // If not terminal, return the minimax recursive function return { Reward = 0.9 * GetMaxRewardRec(control, currState, false, opponent, alpha, beta, 1, MaxDepth); } if (Reward != maxReward && i != 0) // If the reward changed along the call { allSame = false; } if (Reward > maxReward) { maxID = i; maxReward = Reward; } // Alpha beta pruning alpha = Math.Max(alpha, maxReward); if (beta <= alpha) { break; } } } if (!allSame) { return(new Actione(maxID)); } else // If all rewards were the same, do a random action { botAction = new Actione(rand.Next(control.ActionNum)); while (!control.IsLegalAction(botAction)) { botAction = new Actione(rand.Next(control.ActionNum)); } return(botAction); } }
/// <summary> /// The actual recursive function that return the max reward of a state /// </summary> /// <param name="IsMaxing">Is the current player the player that is maximizing</param> /// <param name="player">The current player</param> /// <param name="alpha">The best option for maximizing player</param> /// <param name="beta">The best option for minimizing player</param> private double GetMaxRewardRec(GameControlBase control, State s, bool IsMaxing, Players player, double alpha, double beta, int level, int maxLevel) { // If reached the max depth if (level > maxLevel) { return(0); } // This is needed in order to register the next action as the opponent's Players opponent; if (player == Players.Player1) { opponent = Players.Player2; } else { opponent = Players.Player1; } double BestVal; double Reward = 0; State currState = (State)s.Clone(); if (IsMaxing) { BestVal = int.MinValue; for (int i = 0; i < control.ActionNum; i++) { if (control.IsLegalAction(new Actione(i), currState)) { s.Copy(currState); control.RegisterAction(currState, new Actione(i), player); if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns { Reward = control.GetReward(player, currState); } else // If not terminal, return the minimax recursive function return { Reward = 0.9 * GetMaxRewardRec(control, currState, !IsMaxing, opponent, alpha, beta, level + 1, maxLevel); } BestVal = Math.Max(Reward, BestVal); // Alpha beta pruning alpha = Math.Max(alpha, BestVal); if (beta <= alpha) { break; } } } } else { BestVal = int.MaxValue; for (int i = 0; i < control.ActionNum; i++) { if (control.IsLegalAction(new Actione(i), currState)) { s.Copy(currState); control.RegisterAction(currState, new Actione(i), player); if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns { Reward = control.GetReward(opponent, currState); } else // If not terminal, return the minimax recursive function return { Reward = 0.9 * GetMaxRewardRec(control, currState, !IsMaxing, opponent, alpha, beta, level + 1, maxLevel); } BestVal = Math.Min(Reward, BestVal); // Alpha beta pruning beta = Math.Min(beta, BestVal); if (beta <= alpha) { break; } } } } return(BestVal); }