Exemplo n.º 1
0
        private double GetMaxQ(VirusBoard state)
        {
            double max = -10;

            Move[] actions = state.GetPossibleMoves(playerNumber);
            foreach (Move a in actions)
            {
                double value = 0;
                if (!Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    value = 0;
                }
                else
                {
                    value = Q[state.CustomHash()][a.CustomHash()];
                }
                if (value > max)
                {
                    max = value;
                }
            }
            if (Q[state.CustomHash()].ContainsKey(0))
            {
                if (Q[state.CustomHash()][0] > max)
                {
                    max = Q[state.CustomHash()][0];
                }
            }
            return(max);
        }
Exemplo n.º 2
0
        public override Move Move(Virus percept)
        {
            VirusBoard currentState = percept.GetBoardCopy();

            Move[] actions = currentState.GetPossibleMoves(playerNumber);
            Move   action  = actions[0];

            double max = double.NegativeInfinity;

            foreach (Move a in actions)
            {
                VirusBoard newState = currentState.GetUpdated(a);
                double     q        = 0;
                if (Q.ContainsKey(currentState.CustomHash()))
                {
                    if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash()))
                    {
                        q = Q[currentState.CustomHash()][a.CustomHash()];
                    }
                }
                q += MinValue(newState, 0);
                if (q > max)
                {
                    max    = q;
                    action = a;
                }
                if (max == 1)
                {
                    break;
                }
            }

            return(action);
        }
Exemplo n.º 3
0
        private Move GetMaxExplorationFunctionA(VirusBoard state)
        {
            double max    = double.NegativeInfinity;
            Move   action = default(Move);

            Move[] actions = state.GetPossibleMoves(playerNumber);

            bool berandom = random.NextDouble() < RandomRate;

            foreach (Move a in actions)
            {
                double value = 0;

                if (Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    if (Q[state.CustomHash()][a.CustomHash()] >= 1)
                    {
                        value  = 1;
                        max    = value;
                        action = a;
                        break;
                    }
                    else if (Q[state.CustomHash()][a.CustomHash()] <= -1)
                    {
                        value = -1;
                    }
                    else
                    {
                        if (berandom)
                        {
                            value = random.NextDouble();
                        }
                        else
                        {
                            value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
                        }
                    }
                }
                else
                {
                    value = 1;
                }
                if (value > max)
                {
                    max    = value;
                    action = a;
                }
            }
            return(action);
        }
Exemplo n.º 4
0
        //Calc maxValue
        private double MaxValue(VirusBoard state, int iteration)
        {
            iteration++;
            if (state.winner == playerNumber)
            {
                return(double.PositiveInfinity);
            }
            if (state.winner != playerNumber && state.winner != 0)
            {
                return(double.NegativeInfinity);
            }

            if (iteration < searchLength)
            {
                Move[] actions = state.GetPossibleMoves(playerNumber);

                double max = double.NegativeInfinity;
                foreach (Move a in actions)
                {
                    VirusBoard newState = state.GetUpdated(a);

                    double q = Utility(state, newState);
                    if (Q.ContainsKey(state.CustomHash()))
                    {
                        if (Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                        {
                            q = Q[state.CustomHash()][a.CustomHash()];
                        }
                    }

                    q += MinValue(newState, iteration);
                    if (q > max)
                    {
                        max = q;
                    }
                    if (max == double.PositiveInfinity)
                    {
                        return(max);
                    }
                }

                return(max);
            }
            else
            {
                return(0);
            }
        }
Exemplo n.º 5
0
        public override Move Move(Virus percept)
        {
            //Checking if we're at an terminal state
            byte       winner   = percept.Winner;
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }


            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }
                if (!Q.ContainsKey(prevState.CustomHash()))
                {
                    Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>());
                }
                if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
                }

                if (winner == playerNumber)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, 1);
                    }
                }
                else if (winner != playerNumber && winner != 0)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, -1);
                    }
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            return(prevAction);
        }
Exemplo n.º 6
0
        public override Move Move(Virus percept)
        {
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }

            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
            {
                Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
            }
            return(prevAction);
        }
Exemplo n.º 7
0
        public override void EndGame(Virus percept)
        {
            if (learn)
            {
                double reward = 0;
                byte   winner = percept.Winner;
                if (winner == playerNumber)
                {
                    reward = 1;
                }
                else if (winner != playerNumber && winner != 0)
                {
                    reward = -1;
                }
                else
                {
                    reward = 0;
                }
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = default(VirusBoard);
            prevAction = default(Move);
            prevReward = 0;
        }