Exemplo n.º 1
0
        private Intent GetBestIntent(TicTacTardStateWithAction action)
        {
            float  maxValue = -1;
            Intent intent   = action.intent;

            TicTacTardPlayer fakePlayer = new TicTacTardPlayer(2, "1");

            for (int i = 5; i < 14; ++i)
            {
                Intent tempIntent = (Intent)i;

                if (TicTacTardGame.CanPlayIntent(action, tempIntent))
                {
                    TicTacTardState nextState = TicTacTardGame.PlayAction(action, fakePlayer, tempIntent, false);

                    TicTacTardState calculatedState = ticTacTardStateWithActions.Find(state => state.IsSameState(nextState));

                    if (calculatedState != null && calculatedState.value > maxValue)
                    {
                        maxValue = calculatedState.value;
                        intent   = tempIntent;
                    }
                }
            }

            return(intent);
        }
Exemplo n.º 2
0
        public bool ComputeInitIntent(TicTacTardStateWithAction state, bool isFirstVisit, bool offPolicy)
        {
            bool isStable = SimulateEpisode(state, isFirstVisit, offPolicy);

            if (offPolicy)
            {
                foreach (TicTacTardStateWithAction action in ticTacTardStateWithActions)
                {
                    action.value = action.WinScore / action.Visits;
                }

                for (int i = 0; i < ticTacTardStateWithActions.Count; ++i)
                {
                    Intent intent = GetBestIntent(ticTacTardStateWithActions[i]);

                    if (intent != Intent.Nothing && intent != ticTacTardStateWithActions[i].intent)
                    {
                        ticTacTardStateWithActions[i].intent = intent;
                        isStable = false;
                    }
                }
            }

            return(isStable);
        }
        public void InitIntent(bool isHuman)
        {
            isInit = false;

            if (player.Count == 0)
            {
                switch (gameType)
                {
                case TicTacTardGameType.HumanVHuman:
                    for (int i = 0; i < 2; i++)
                    {
                        player.Add(new TicTacTardPlayer(i, i.ToString()));
                    }
                    break;

                case TicTacTardGameType.HumanVBot:
                    player.Add(new TicTacTardAndroid(0, "0"));
                    player.Add(new TicTacTardPlayer(1, "1"));

                    TicTacTardStateWithAction currentStateWithAction =
                        new TicTacTardStateWithAction(currentState, GetRandomPossibleMove(currentState));

                    int  safeLoopIteration = 0;
                    bool policyIsStable    = false;

                    while (!policyIsStable && safeLoopIteration < 100)
                    {
                        ++safeLoopIteration;
                        policyIsStable = (player[0] as TicTacTardAndroid).ComputeInitIntent(currentStateWithAction, true, true);
                    }

                    if (safeLoopIteration >= 100)
                    {
                        Debug.LogError("safeLoopIteration trigger : ExitComputeIntent");
                    }

                    Debug.Log("Bot generate " + (player[0] as TicTacTardAndroid).ticTacTardStateWithActions.Count + " states");
                    break;

                case TicTacTardGameType.BotVBot:
                    for (int i = 0; i < 2; i++)
                    {
                        player.Add(new TicTacTardAndroid(i, i.ToString()));
                    }
                    break;
                }
            }

            ((TicTacTardPlayer)player[0]).ResetScore();
            ((TicTacTardPlayer)player[1]).ResetScore();
            currentPlayer = (TicTacTardPlayer)player[0];
        }
Exemplo n.º 4
0
        private Intent EpsilonGreedy(TicTacTardStateWithAction state)
        {
            int    rand = Random.Range(0, 10);
            Intent intent;

            if (state.nbActionPlayed == 9)
            {
                return(state.intent);
            }

            if (rand < 5)
            {
                intent = state.intent;
            }
            else
            {
                do
                {
                    intent = TicTacTardGame.GetRandomPossibleMove(state);
                } while (intent == Intent.Nothing);
            }

            return(intent);
        }
Exemplo n.º 5
0
        public bool SimulateEpisode(TicTacTardStateWithAction state, bool isFirstVisit, bool offPolicy)
        {
            bool foundSameState = false;
            bool isStable       = true;

            foreach (TicTacTardStateWithAction action in ticTacTardStateWithActions)
            {
                action.WinScore = 0;
                action.Visits   = 0;

                if (action.IsSameState(state))
                {
                    foundSameState = true;
                }
            }

            if (!foundSameState)
            {
                ticTacTardStateWithActions.Add(state);
            }

            for (int i = 0; i < nbEpisode; ++i)
            {
                TicTacEpisode episode = GenerateEpisodeFromState(state);
                float         g       = 0;

                for (int t = episode.EpisodeStates.Count - 2; t >= 0; --t)
                {
                    TicTacTardStateWithAction currentState = episode.EpisodeStates[t];

                    // Debug.Log("reward " + episode.EpisodeStates[t + 1].reward + " g " + g + " winScore " + currentState.WinScore);
                    g = g + episode.EpisodeStates[t + 1].reward;
                    bool foundSameStateInEpisode = episode.FoundSameStateUntilIndex(t - 1, currentState);

                    if (!isFirstVisit || !foundSameStateInEpisode)
                    {
                        currentState.WinScore = currentState.WinScore + g;
                        currentState.Visits  += 1;
                    }
                }

                if (!offPolicy)
                {
                    foreach (TicTacTardStateWithAction action in ticTacTardStateWithActions)
                    {
                        action.value = action.WinScore / action.Visits;
                    }

                    foreach (TicTacTardStateWithAction action in ticTacTardStateWithActions)
                    {
                        Intent intent = GetBestIntent(action);

                        if (intent != Intent.Nothing && intent != action.intent)
                        {
                            action.intent = intent;
                            isStable      = false;
                        }
                    }
                }
            }

            return(isStable);
        }
Exemplo n.º 6
0
        private TicTacEpisode GenerateEpisodeFromState(TicTacTardStateWithAction state)
        {
            TicTacEpisode episode = new TicTacEpisode();

            Intent initialIntent = EpsilonGreedy(state);

            TicTacTardPlayer fakeOpponent1      = new TicTacTardPlayer(0, "0");
            TicTacTardPlayer fakeOpponent2      = new TicTacTardPlayer(1, "1");
            TicTacTardPlayer currentPlayer      = fakeOpponent1;
            string           tokenCurrentPlayer = Token;

            TicTacTardStateWithAction currentState = new TicTacTardStateWithAction(state, state.intent);

            currentState.reward   = 0;
            currentState.WinScore = 0;
            currentState.Visits   = 0;

            int safeLoopIteration = 0;

            while (currentState.nbActionPlayed < 9 && !fakeOpponent1.playerWon && !fakeOpponent2.playerWon && safeLoopIteration < 200)
            {
                ++safeLoopIteration;

                TicTacTardState newState;
                if (tokenCurrentPlayer == Token)
                {
                    newState =
                        TicTacTardGame.PlayAction(currentState, currentPlayer, initialIntent, false);

                    if (newState == null)
                    {
                        initialIntent = TicTacTardGame.GetRandomPossibleMove(currentState);
                        continue;
                    }
                }
                else
                {
                    newState =
                        TicTacTardGame.PlayAction(currentState, currentPlayer, TicTacTardGame.GetRandomPossibleMove(currentState), false);

                    if (newState == null)
                    {
                        continue;
                    }
                }

                TicTacTardStateWithAction existingState = ticTacTardStateWithActions.Find(stateSaved => newState.IsSameState(stateSaved));

                if (existingState == null)
                {
                    TicTacTardStateWithAction initNewState =
                        new TicTacTardStateWithAction(newState, TicTacTardGame.GetRandomPossibleMove(newState));

                    initNewState.prevState = currentState;
                    currentState           = initNewState;
                    initialIntent          = currentState.intent;
                    ticTacTardStateWithActions.Add(currentState);
                }
                else
                {
                    currentState  = existingState;
                    initialIntent = currentState.intent;
                }

                episode.EpisodeStates.Add(currentState);

                currentPlayer      = tokenCurrentPlayer == fakeOpponent1.Token ? fakeOpponent2 : fakeOpponent1;
                tokenCurrentPlayer = currentPlayer.Token;
            }

            if (safeLoopIteration >= 200)
            {
                Debug.LogError("Safe loopIteration trigger : exit generate episode");
            }

            if (fakeOpponent1.playerWon)
            {
                currentState.reward = 1;
            }
            else
            {
                currentState.reward = 0;
            }

            return(episode);
        }