public static bool ChangeOpponent() { foreach (ControlBase control in Screen.controls.Keys) { if (control.IsLearnable) { GameControlBase CastControl = (GameControlBase)control; if (CastControl.PlayAgainst == Against.Bot) { CastControl.PlayAgainst = Against.Minimax; } else if (CastControl.PlayAgainst == Against.Minimax) { CastControl.PlayAgainst = Against.Noone; } else if (CastControl.PlayAgainst == Against.Noone) { CastControl.PlayAgainst = Against.Bot; } Screen.GetGUI().ShowNotification("Player's turn changed to " + CastControl.PlayAgainst.ToString(), new Vector2(300, 10), 1000, gameTime); return(true); } } return(false); }
/// <summary> /// Initialize some learning and technical variables and ready the bot for learning. /// Has to be called before Learn() /// </summary> /// <param name="control"></param> /// <param name="player">The player that the network will be playing</param> public override void Setup(GameControlBase control, GameControlBase.Players player) { base.Setup(control, player); rand = new Random(); if (IsMultidimensionalOutput) // If the input is only the state { Dimensions = new List <int> { control.FeatureNum, 110, 50, control.ActionNum } } ; else // If the input is the state and action { Dimensions = new List <int> { control.FeatureNum + control.ActionNum, 110, 50, 1 } }; if (NeuralNet == null) // If the neural network wasn't loaded in another way { NeuralNet = new NetworkVectors(Dimensions); } OldNeuralNet = (NetworkVectors)NeuralNet.Clone(); Control = control; BotTurn = player; ReplayMem = new List <Transition>(); Epsilon = 1; }
private void InitCurControl() { if (m_curControl == null) { m_curControl = gameObject.AddComponent <GameKeyBoardControl>(); } }
/// <summary> /// Go over all actions and return the one with the highest Q value. /// </summary> /// <param name="control"></param> /// <param name="state">The state of control to use.</param> /// <param name="isLegal">If true, the returned action has to be considered legal in the control.</param> /// <returns></returns> protected override Actione getMaxAction(GameControlBase control, State state, bool isLegal) { int maxID = 0; double max = 0; if (IsMultidimensionalOutput) { // Feed the state to the neural network and find the maximum NeuralNet.Feed(CreateInputArray(state.Board)); for (int id = 0; id < control.ActionNum; id++) { if (NeuralNet.Activations[NeuralNet.Activations.Count - 1][id] > max) { if (isLegal) // If the action has to be legal { if (control.IsLegalAction(new Actione(id))) { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][id]; } } else { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][id]; } } } } else { for (int id = 0; id < control.ActionNum; id++) { // Feed the state and the action to the neural network NeuralNet.Feed(CreateInputArray(state.Board, id)); if (NeuralNet.Activations[NeuralNet.Activations.Count - 1][0] > max) { if (isLegal) // If the action has to be legal { if (control.IsLegalAction(new Actione(id))) { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][0]; } } else { maxID = id; max = NeuralNet.Activations[NeuralNet.Activations.Count - 1][0]; } } } } return(new Actione(maxID)); // Return the best action }
public static bool StartLearn() { foreach (ControlBase control in Screen.controls.Keys) { if (control.IsLearnable) { GameControlBase CastControl = (GameControlBase)control; CastControl.StartLearn(); return(true); } } return(false); }
/// <summary> /// Initialize some learning and technical variables and ready the bot for learning. /// Has to be called before Learn() /// </summary> /// <param name="control"></param> /// <param name="player">The player that the network will be playing</param> public override void Setup(GameControlBase control, GameControlBase.Players player) { base.Setup(control, player); Control = control; if (Q_Table == null) { Q_Table = new double[control.StateNum, control.ActionNum]; } BotTurn = player; ActionNum = control.ActionNum; StateNum = control.StateNum; }
public static bool SaveBot() { foreach (ControlBase control in Screen.controls.Keys) { if (control.IsLearnable) { GameControlBase CastControl = (GameControlBase)control; LearningBot bot = CastControl.GetBot(); NetworkLoader.SaveLearningBot(NET_SAVE_NAME, bot); return(true); } } return(false); }
internal static bool LoadOpponent() { foreach (ControlBase control in Game1.Screen.controls.Keys) { if (control.IsLearnable) { GameControlBase CastControl = (GameControlBase)control; LearningBot bot = NetworkLoader.LoadLearningBot(OPPONENT_SAVE_NAME); bot.Setup(CastControl, bot.BotTurn); CastControl.SetOpponent(bot); return(true); } } return(false); }
public static bool LoadBot() { foreach (ControlBase control in Screen.controls.Keys) { if (control.IsLearnable) { GameControlBase CastControl = (GameControlBase)control; LearningBot bot = NetworkLoader.LoadLearningBot(NET_SAVE_NAME); bot.Setup(CastControl, bot.BotTurn); CastControl.SetBot(bot); return(true); } } return(false); }
/// <summary> /// Go over all actions and return the one with the highest Q value. /// </summary> /// <param name="control"></param> /// <param name="state">The state of control to use.</param> /// <param name="isLegal">If true, the returned action has to be considered legal in the control.</param> /// <returns></returns> protected override Actione getMaxAction(GameControlBase control, State state, bool isLegal) { double[] tmp = new double[ActionNum]; double[] tmpVals = new double[ActionNum]; for (int x = 0; x < ActionNum; x++) // Fill a temporary array { tmp[x] = x; tmpVals[x] = Q_Table[state.ID, x]; } for (int i = 0; i < ActionNum; i++) // Selection Sort { for (int i2 = i; i2 < ActionNum; i2++) { if (tmpVals[i2] > tmpVals[i]) { Swap(tmp, i, i2); Swap(tmpVals, i, i2); } } } if (isLegal) // Return the best action (first in sorted array), or if it is required to be legal return the first legal action. { for (int i = 0; i < ActionNum; i++) { if (control.IsLegalAction(new Actione((int)tmp[i]))) { return(new Actione((int)tmp[i])); } } return(null); } else { return(new Actione((int)tmp[0])); } }
/// <summary> /// A wrapper function to the recursive function, that returns the best action /// </summary> /// <param name="control">Control that the state is in</param> /// <param name="s">The current control state</param> /// <param name="alpha">Has to be int.MinValue</param> /// <param name="beta">Has to be int.MaxVakue</param> /// <param name="player">The player that is maximizing</param> private Actione BestMove(GameControlBase control, State s, double alpha, double beta, Players player) { // This is needed in order to register the next action as the opponent's Players opponent; if (player == Players.Player1) { opponent = Players.Player2; } else { opponent = Players.Player1; } int maxID = 0; double maxReward = -1000000; double Reward = 0; bool allSame = true; // Are all the rewards the same? Actione botAction; State currState = (State)s.Clone(); // Clone the state because it is a reference, so save the original state for (int i = 0; i < control.ActionNum; i++) { if (control.IsLegalAction(new Actione(i), currState)) { // Make the action as the player s.Copy(currState); control.RegisterAction(currState, new Actione(i), player); if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns { Reward = control.GetReward(player, currState); } else // If not terminal, return the minimax recursive function return { Reward = 0.9 * GetMaxRewardRec(control, currState, false, opponent, alpha, beta, 1, MaxDepth); } if (Reward != maxReward && i != 0) // If the reward changed along the call { allSame = false; } if (Reward > maxReward) { maxID = i; maxReward = Reward; } // Alpha beta pruning alpha = Math.Max(alpha, maxReward); if (beta <= alpha) { break; } } } if (!allSame) { return(new Actione(maxID)); } else // If all rewards were the same, do a random action { botAction = new Actione(rand.Next(control.ActionNum)); while (!control.IsLegalAction(botAction)) { botAction = new Actione(rand.Next(control.ActionNum)); } return(botAction); } }
/// <summary> /// Get the action that the minimax bot considers the best in the given state and the given control /// </summary> protected override Actione getMaxAction(GameControlBase control, State state, bool isLegal) { return(BestMove(control, state, int.MinValue, int.MaxValue, control.CurrTurn)); }
/// <summary> /// The actual recursive function that return the max reward of a state /// </summary> /// <param name="IsMaxing">Is the current player the player that is maximizing</param> /// <param name="player">The current player</param> /// <param name="alpha">The best option for maximizing player</param> /// <param name="beta">The best option for minimizing player</param> private double GetMaxRewardRec(GameControlBase control, State s, bool IsMaxing, Players player, double alpha, double beta, int level, int maxLevel) { // If reached the max depth if (level > maxLevel) { return(0); } // This is needed in order to register the next action as the opponent's Players opponent; if (player == Players.Player1) { opponent = Players.Player2; } else { opponent = Players.Player1; } double BestVal; double Reward = 0; State currState = (State)s.Clone(); if (IsMaxing) { BestVal = int.MinValue; for (int i = 0; i < control.ActionNum; i++) { if (control.IsLegalAction(new Actione(i), currState)) { s.Copy(currState); control.RegisterAction(currState, new Actione(i), player); if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns { Reward = control.GetReward(player, currState); } else // If not terminal, return the minimax recursive function return { Reward = 0.9 * GetMaxRewardRec(control, currState, !IsMaxing, opponent, alpha, beta, level + 1, maxLevel); } BestVal = Math.Max(Reward, BestVal); // Alpha beta pruning alpha = Math.Max(alpha, BestVal); if (beta <= alpha) { break; } } } } else { BestVal = int.MaxValue; for (int i = 0; i < control.ActionNum; i++) { if (control.IsLegalAction(new Actione(i), currState)) { s.Copy(currState); control.RegisterAction(currState, new Actione(i), player); if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns { Reward = control.GetReward(opponent, currState); } else // If not terminal, return the minimax recursive function return { Reward = 0.9 * GetMaxRewardRec(control, currState, !IsMaxing, opponent, alpha, beta, level + 1, maxLevel); } BestVal = Math.Min(Reward, BestVal); // Alpha beta pruning beta = Math.Min(beta, BestVal); if (beta <= alpha) { break; } } } } return(BestVal); }
/// <summary> /// Get the <c>LearningBot</c> ready to <c>Learn()</c> /// </summary> /// <param name="player">The turn the <c>LearningBot</c> will take</param> public virtual void Setup(GameControlBase control, GameControlBase.Players player) { IsSetup = true; }
/// <summary> /// Take the best legal action (according to Bot's policy) in the given game state. /// </summary> public void TakeAction(GameControlBase control, State state) { Actione action = getMaxAction(control, state, true); control.DoAction(action); }
/// <summary> /// Go over all actions and return the one with the highest value. /// </summary> /// <param name="control"></param> /// <param name="state">The state of control to use.</param> /// <param name="isLegal">If true, the returned action has to be considered legal in the control.</param> /// <returns></returns> protected abstract Actione getMaxAction(GameControlBase control, State state, bool isLegal);