示例#1
0
        /// <summary>
        /// Go over all actions and return the one with the highest Q value.
        /// </summary>
        /// <param name="control"></param>
        /// <param name="state">The state of control to use.</param>
        /// <param name="isLegal">If true, the returned action has to be considered legal in the control.</param>
        /// <returns></returns>
        protected override Actione getMaxAction(GameControlBase control, State state, bool isLegal)
        {
            int    maxID = 0;
            double max   = 0;

            if (IsMultidimensionalOutput)
            {
                // Feed the state to the neural network and find the maximum
                NeuralNet.Feed(CreateInputArray(state.Board));
                for (int id = 0; id < control.ActionNum; id++)
                {
                    if (NeuralNet.Activations[NeuralNet.Activations.Count - 1][id] > max)
                    {
                        if (isLegal) // If the action has to be legal
                        {
                            if (control.IsLegalAction(new Actione(id)))
                            {
                                maxID = id;
                                max   = NeuralNet.Activations[NeuralNet.Activations.Count - 1][id];
                            }
                        }
                        else
                        {
                            maxID = id;
                            max   = NeuralNet.Activations[NeuralNet.Activations.Count - 1][id];
                        }
                    }
                }
            }
            else
            {
                for (int id = 0; id < control.ActionNum; id++)
                {
                    // Feed the state and the action to the neural network
                    NeuralNet.Feed(CreateInputArray(state.Board, id));

                    if (NeuralNet.Activations[NeuralNet.Activations.Count - 1][0] > max)
                    {
                        if (isLegal) // If the action has to be legal
                        {
                            if (control.IsLegalAction(new Actione(id)))
                            {
                                maxID = id;
                                max   = NeuralNet.Activations[NeuralNet.Activations.Count - 1][0];
                            }
                        }
                        else
                        {
                            maxID = id;
                            max   = NeuralNet.Activations[NeuralNet.Activations.Count - 1][0];
                        }
                    }
                }
            }
            return(new Actione(maxID)); // Return the best action
        }
示例#2
0
        /// <summary>
        /// Execute a move by the given bot
        /// </summary>
        /// <param name="against">A bot, if null then takes random action</param>
        protected void BotMove(Bot against)
        {
            Actione botAction;

            if (!Control.IsTerminalState())
            {
                if (against != null)
                {
                    against.TakeAction(Control, Control.GetState());
                }
                else // Find a random legal action
                {
                    botAction = new Actione(rand.Next(Control.ActionNum));
                    while (!Control.IsLegalAction(botAction))
                    {
                        botAction = new Actione(rand.Next(Control.ActionNum));
                    }
                    Control.DoAction(botAction);
                }
            }
        }
示例#3
0
        /// <summary>
        /// Go over all actions and return the one with the highest Q value.
        /// </summary>
        /// <param name="control"></param>
        /// <param name="state">The state of control to use.</param>
        /// <param name="isLegal">If true, the returned action has to be considered legal in the control.</param>
        /// <returns></returns>
        protected override Actione getMaxAction(GameControlBase control, State state, bool isLegal)
        {
            double[] tmp     = new double[ActionNum];
            double[] tmpVals = new double[ActionNum];
            for (int x = 0; x < ActionNum; x++) // Fill a temporary array
            {
                tmp[x]     = x;
                tmpVals[x] = Q_Table[state.ID, x];
            }
            for (int i = 0; i < ActionNum; i++) // Selection Sort
            {
                for (int i2 = i; i2 < ActionNum; i2++)
                {
                    if (tmpVals[i2] > tmpVals[i])
                    {
                        Swap(tmp, i, i2);
                        Swap(tmpVals, i, i2);
                    }
                }
            }
            if (isLegal) // Return the best action (first in sorted array), or if it is required to be legal return the first legal action.
            {
                for (int i = 0; i < ActionNum; i++)
                {
                    if (control.IsLegalAction(new Actione((int)tmp[i])))
                    {
                        return(new Actione((int)tmp[i]));
                    }
                }

                return(null);
            }
            else
            {
                return(new Actione((int)tmp[0]));
            }
        }
示例#4
0
        /// <summary>
        /// A wrapper function to the recursive function, that returns the best action
        /// </summary>
        /// <param name="control">Control that the state is in</param>
        /// <param name="s">The current control state</param>
        /// <param name="alpha">Has to be int.MinValue</param>
        /// <param name="beta">Has to be int.MaxVakue</param>
        /// <param name="player">The player that is maximizing</param>
        private Actione BestMove(GameControlBase control, State s, double alpha, double beta, Players player)
        {
            // This is needed in order to register the next action as the opponent's
            Players opponent;

            if (player == Players.Player1)
            {
                opponent = Players.Player2;
            }
            else
            {
                opponent = Players.Player1;
            }

            int    maxID     = 0;
            double maxReward = -1000000;
            double Reward    = 0;

            bool    allSame = true; // Are all the rewards the same?
            Actione botAction;

            State currState = (State)s.Clone(); // Clone the state because it is a reference, so save the original state

            for (int i = 0; i < control.ActionNum; i++)
            {
                if (control.IsLegalAction(new Actione(i), currState))
                {
                    // Make the action as the player
                    s.Copy(currState);
                    control.RegisterAction(currState, new Actione(i), player);
                    if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns
                    {
                        Reward = control.GetReward(player, currState);
                    }
                    else // If not terminal, return the minimax recursive function return
                    {
                        Reward = 0.9 * GetMaxRewardRec(control, currState, false, opponent, alpha, beta, 1, MaxDepth);
                    }
                    if (Reward != maxReward && i != 0) // If the reward changed along the call
                    {
                        allSame = false;
                    }
                    if (Reward > maxReward)
                    {
                        maxID     = i;
                        maxReward = Reward;
                    }
                    // Alpha beta pruning
                    alpha = Math.Max(alpha, maxReward);
                    if (beta <= alpha)
                    {
                        break;
                    }
                }
            }
            if (!allSame)
            {
                return(new Actione(maxID));
            }
            else // If all rewards were the same, do a random action
            {
                botAction = new Actione(rand.Next(control.ActionNum));
                while (!control.IsLegalAction(botAction))
                {
                    botAction = new Actione(rand.Next(control.ActionNum));
                }
                return(botAction);
            }
        }
示例#5
0
        /// <summary>
        /// The actual recursive function that return the max reward of a state
        /// </summary>
        /// <param name="IsMaxing">Is the current player the player that is maximizing</param>
        /// <param name="player">The current player</param>
        /// <param name="alpha">The best option for maximizing player</param>
        /// <param name="beta">The best option for minimizing player</param>
        private double GetMaxRewardRec(GameControlBase control, State s, bool IsMaxing, Players player, double alpha, double beta, int level, int maxLevel)
        {
            // If reached the max depth
            if (level > maxLevel)
            {
                return(0);
            }

            // This is needed in order to register the next action as the opponent's
            Players opponent;

            if (player == Players.Player1)
            {
                opponent = Players.Player2;
            }
            else
            {
                opponent = Players.Player1;
            }

            double BestVal;
            double Reward = 0;

            State currState = (State)s.Clone();

            if (IsMaxing)
            {
                BestVal = int.MinValue;
                for (int i = 0; i < control.ActionNum; i++)
                {
                    if (control.IsLegalAction(new Actione(i), currState))
                    {
                        s.Copy(currState);
                        control.RegisterAction(currState, new Actione(i), player);
                        if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns
                        {
                            Reward = control.GetReward(player, currState);
                        }
                        else // If not terminal, return the minimax recursive function return
                        {
                            Reward = 0.9 * GetMaxRewardRec(control, currState, !IsMaxing, opponent, alpha, beta, level + 1, maxLevel);
                        }
                        BestVal = Math.Max(Reward, BestVal);
                        // Alpha beta pruning
                        alpha = Math.Max(alpha, BestVal);
                        if (beta <= alpha)
                        {
                            break;
                        }
                    }
                }
            }
            else
            {
                BestVal = int.MaxValue;
                for (int i = 0; i < control.ActionNum; i++)
                {
                    if (control.IsLegalAction(new Actione(i), currState))
                    {
                        s.Copy(currState);
                        control.RegisterAction(currState, new Actione(i), player);
                        if (control.IsTerminalState(currState)) // If its terminal, reward is the reward that the control returns
                        {
                            Reward = control.GetReward(opponent, currState);
                        }
                        else // If not terminal, return the minimax recursive function return
                        {
                            Reward = 0.9 * GetMaxRewardRec(control, currState, !IsMaxing, opponent, alpha, beta, level + 1, maxLevel);
                        }
                        BestVal = Math.Min(Reward, BestVal);
                        // Alpha beta pruning
                        beta = Math.Min(beta, BestVal);
                        if (beta <= alpha)
                        {
                            break;
                        }
                    }
                }
            }
            return(BestVal);
        }