Beispiel #1
0
        private double GetMaxQ(VirusBoard state)
        {
            double max = -10;

            Move[] actions = state.GetPossibleMoves(playerNumber);
            foreach (Move a in actions)
            {
                double value = 0;
                if (!Q[state].ContainsKey(a))
                {
                    value = 0;
                }
                else
                {
                    value = Q[state][a];
                }
                if (value > max)
                {
                    max = value;
                }
            }
            if (Q[state].ContainsKey(default(Move)))
            {
                if (Q[state][default(Move)] > max)
                {
                    max = Q[state][default(Move)];
                }
            }
            return(max);
        }
Beispiel #2
0
        private Move GetMaxExplorationFunctionA(VirusBoard state)
        {
            double max    = -10;
            Move   action = default(Move);

            Move[] actions = state.GetPossibleMoves(playerNumber);
            foreach (Move a in actions)
            {
                double value = 0;
                if (!Q.ContainsKey(state))
                {
                    Q.Add(state, new Dictionary <Move, double>());
                }
                if (Q[state].ContainsKey(a))
                {
                    value = Q[state][a] + 1 / (double)N[state][a];
                }
                else
                {
                    value = 1;
                }
                if (value > max)
                {
                    max    = value;
                    action = a;
                }
            }
            return(action);
        }
Beispiel #3
0
 public override void EndGame(Virus percept)
 {
     Move(percept);
     prevState  = default(VirusBoard);
     prevAction = default(Move);
     prevReward = 0;
 }
Beispiel #4
0
 public VirusMemory(VirusBoard start, Move action, VirusBoard end, double reward)
 {
     StartState = start;
     Action     = action;
     EndState   = end;
     Reward     = reward;
 }
Beispiel #5
0
        private double GetMaxQ(VirusBoard state)
        {
            double max = -10;

            Move[] actions = state.GetPossibleMoves(playerNumber);
            foreach (Move a in actions)
            {
                double value = 0;
                if (!Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    value = 0;
                }
                else
                {
                    value = Q[state.CustomHash()][a.CustomHash()];
                }
                if (value > max)
                {
                    max = value;
                }
            }
            if (Q[state.CustomHash()].ContainsKey(0))
            {
                if (Q[state.CustomHash()][0] > max)
                {
                    max = Q[state.CustomHash()][0];
                }
            }
            return(max);
        }
Beispiel #6
0
        public override Move Move(Virus percept)
        {
            VirusBoard currentState = percept.GetBoardCopy();

            Move[] actions = currentState.GetPossibleMoves(playerNumber);
            Move   action  = actions[0];

            double max = double.NegativeInfinity;

            foreach (Move a in actions)
            {
                VirusBoard newState = currentState.GetUpdated(a);
                double     q        = 0;
                if (Q.ContainsKey(currentState.CustomHash()))
                {
                    if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash()))
                    {
                        q = Q[currentState.CustomHash()][a.CustomHash()];
                    }
                }
                q += MinValue(newState, 0);
                if (q > max)
                {
                    max    = q;
                    action = a;
                }
                if (max == 1)
                {
                    break;
                }
            }

            return(action);
        }
Beispiel #7
0
 public VirusMemory(VirusBoard start, Move action, VirusBoard end)
 {
     StartState = start;
     Action     = action;
     EndState   = end;
     Reward     = VirusNameSpace.Agents.MemoryQAgent.Reward(start, end);
 }
Beispiel #8
0
        public override Move Move(Virus percept)
        {
            //Stopwatch watch = new Stopwatch();
            //watch.Start();
            VirusBoard currentState = percept.GetBoardCopy();

            Move[] actions = currentState.GetPossibleMoves(playerNumber);
            Move   action  = actions[0];

            double max = double.NegativeInfinity;

            foreach (Move a in actions)
            {
                VirusBoard newState = currentState.GetUpdated(a);
                double     q        = Utility(currentState, newState);
                q += MinValue(newState, 0);
                if (q > max)
                {
                    max    = q;
                    action = a;
                }
                if (max == double.PositiveInfinity)
                {
                    break;
                }
            }
            //watch.Stop();

            //StreamWriter timeWriter = new StreamWriter("mmTimeLog",true);
            //timeWriter.WriteLine(watch.ElapsedMilliseconds); // + " ; " + watch.ElapsedTicks);
            //timeWriter.Close();
            return(action);
        }
Beispiel #9
0
        public override Move Move(Virus percept)
        {
            VirusBoard state = percept.GetBoardCopy();

            Move[] actions = state.GetPossibleMoves(playerNumber);
            if (actions.Length < 1)
            {
                return(default(Move));
            }
            Move action;

            if (random.NextDouble() > randomRatio)               // bruteforce
            {
                List <Move> list     = new List <Move>();
                int         maxtaken = -1;

                foreach (Move a in actions)
                {
                    int temp = state.TakeablePieces(a);
                    if (a.IsLongMove)
                    {
                        temp--;
                    }

                    if (temp > maxtaken)
                    {
                        maxtaken = temp;
                        list.Clear();
                        list.Add(a);
                    }
                    else if (temp == maxtaken)
                    {
                        list.Add(a);
                    }
                }

                if (deterministic)
                {
                    action = list[0];
                }
                else
                {
                    action = list[random.Next(list.Count)];
                }
            }
            else               // random
            {
                if (deterministic)
                {
                    action = actions[0];
                }
                else
                {
                    action = actions[random.Next(actions.Length)];
                }
            }
            return(action);
        }
Beispiel #10
0
        public override Move Move(Virus percept)
        {
            //Checking if we're at an terminal state
            byte       winner   = percept.Winner;
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }


            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }
                if (!Q.ContainsKey(prevState.CustomHash()))
                {
                    Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>());
                }
                if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
                }

                if (winner == playerNumber)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, 1);
                    }
                }
                else if (winner != playerNumber && winner != 0)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, -1);
                    }
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            return(prevAction);
        }
Beispiel #11
0
        private Move GetMaxExplorationFunctionA(VirusBoard state)
        {
            double max    = double.NegativeInfinity;
            Move   action = default(Move);

            Move[] actions = state.GetPossibleMoves(playerNumber);

            bool berandom = random.NextDouble() < RandomRate;

            foreach (Move a in actions)
            {
                double value = 0;

                if (Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    if (Q[state.CustomHash()][a.CustomHash()] >= 1)
                    {
                        value  = 1;
                        max    = value;
                        action = a;
                        break;
                    }
                    else if (Q[state.CustomHash()][a.CustomHash()] <= -1)
                    {
                        value = -1;
                    }
                    else
                    {
                        if (berandom)
                        {
                            value = random.NextDouble();
                        }
                        else
                        {
                            value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
                        }
                    }
                }
                else
                {
                    value = 1;
                }
                if (value > max)
                {
                    max    = value;
                    action = a;
                }
            }
            return(action);
        }
Beispiel #12
0
        //Calc maxValue
        private double MaxValue(VirusBoard state, int iteration)
        {
            iteration++;
            if (state.winner == playerNumber)
            {
                return(double.PositiveInfinity);
            }
            if (state.winner != playerNumber && state.winner != 0)
            {
                return(double.NegativeInfinity);
            }

            if (iteration < searchLength)
            {
                Move[] actions = state.GetPossibleMoves(playerNumber);

                double max = double.NegativeInfinity;
                foreach (Move a in actions)
                {
                    VirusBoard newState = state.GetUpdated(a);

                    double q = Utility(state, newState);
                    if (Q.ContainsKey(state.CustomHash()))
                    {
                        if (Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                        {
                            q = Q[state.CustomHash()][a.CustomHash()];
                        }
                    }

                    q += MinValue(newState, iteration);
                    if (q > max)
                    {
                        max = q;
                    }
                    if (max == double.PositiveInfinity)
                    {
                        return(max);
                    }
                }

                return(max);
            }
            else
            {
                return(0);
            }
        }
Beispiel #13
0
        // Calc minValue
        private double MinValue(VirusBoard state, int iteration)
        {
            iteration++;
            if (state.winner == playerNumber)
            {
                return(double.PositiveInfinity);
            }
            if (state.winner != playerNumber && state.winner != 0)
            {
                return(double.NegativeInfinity);
            }

            if (iteration < searchLength)
            {
                byte   opponent = (playerNumber == 1) ? (byte)2 : (byte)1;
                Move[] actions  = state.GetPossibleMoves(opponent);

                double min = double.PositiveInfinity;
                foreach (Move a in actions)
                {
                    VirusBoard newState = state.GetUpdated(a);
                    double     q        = Utility(state, newState);
                    q += MaxValue(newState, iteration);
                    if (q < min)
                    {
                        min = q;
                    }
                    if (min == double.NegativeInfinity)
                    {
                        return(min);
                    }
                }

                return(min);
            }
            else
            {
                return(0);
            }
        }
Beispiel #14
0
        public override void EndGame(Virus percept)
        {
            if (learn)
            {
                double reward = 0;
                byte   winner = percept.Winner;
                if (winner == playerNumber)
                {
                    reward = 1;
                }
                else if (winner != playerNumber && winner != 0)
                {
                    reward = -1;
                }
                else
                {
                    reward = 0;
                }
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = default(VirusBoard);
            prevAction = default(Move);
            prevReward = 0;
        }
Beispiel #15
0
        private double Utility(VirusBoard currentState, VirusBoard nextState)
        {
            int orgPieces = 0;

            foreach (byte b in currentState.board)
            {
                if (b == playerNumber)
                {
                    orgPieces++;
                    //orgPieces += orgPieces + 2;
                }
                else if (b != playerNumber && b != 0)
                {
                    orgPieces--;
                }
            }

            int newPieces = 0;

            foreach (byte b in nextState.board)
            {
                if (b == playerNumber)
                {
                    newPieces++;
                    //newPieces += newPieces + 2;
                }
                else if (b != playerNumber && b != 0)
                {
                    newPieces--;
                }
            }

            double difference = newPieces - orgPieces;

            difference *= 0.1;
            return(difference);
        }
Beispiel #16
0
        public override Move Move(Virus percept)
        {
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }

            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
            {
                Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
            }
            return(prevAction);
        }