Beispiel #1
0
        public override Move Move(Virus percept)
        {
            //Checking if we're at an terminal state
            byte       winner   = percept.Winner;
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }


            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }
                if (!Q.ContainsKey(prevState.CustomHash()))
                {
                    Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>());
                }
                if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
                }

                if (winner == playerNumber)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, 1);
                    }
                }
                else if (winner != playerNumber && winner != 0)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, -1);
                    }
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            return(prevAction);
        }
Beispiel #2
0
        public override Move Move(Virus percept)
        {
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }

            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
            {
                Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
            }
            return(prevAction);
        }
Beispiel #3
0
        public override void EndGame(Virus percept)
        {
            if (learn)
            {
                double reward = 0;
                byte   winner = percept.Winner;
                if (winner == playerNumber)
                {
                    reward = 1;
                }
                else if (winner != playerNumber && winner != 0)
                {
                    reward = -1;
                }
                else
                {
                    reward = 0;
                }
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = default(VirusBoard);
            prevAction = default(Move);
            prevReward = 0;
        }