public bool SimulateEpisode(TicTacTardStateWithAction state, bool isFirstVisit, bool offPolicy) { bool foundSameState = false; bool isStable = true; foreach (TicTacTardStateWithAction action in ticTacTardStateWithActions) { action.WinScore = 0; action.Visits = 0; if (action.IsSameState(state)) { foundSameState = true; } } if (!foundSameState) { ticTacTardStateWithActions.Add(state); } for (int i = 0; i < nbEpisode; ++i) { TicTacEpisode episode = GenerateEpisodeFromState(state); float g = 0; for (int t = episode.EpisodeStates.Count - 2; t >= 0; --t) { TicTacTardStateWithAction currentState = episode.EpisodeStates[t]; // Debug.Log("reward " + episode.EpisodeStates[t + 1].reward + " g " + g + " winScore " + currentState.WinScore); g = g + episode.EpisodeStates[t + 1].reward; bool foundSameStateInEpisode = episode.FoundSameStateUntilIndex(t - 1, currentState); if (!isFirstVisit || !foundSameStateInEpisode) { currentState.WinScore = currentState.WinScore + g; currentState.Visits += 1; } } if (!offPolicy) { foreach (TicTacTardStateWithAction action in ticTacTardStateWithActions) { action.value = action.WinScore / action.Visits; } foreach (TicTacTardStateWithAction action in ticTacTardStateWithActions) { Intent intent = GetBestIntent(action); if (intent != Intent.Nothing && intent != action.intent) { action.intent = intent; isStable = false; } } } } return(isStable); }
private TicTacEpisode GenerateEpisodeFromState(TicTacTardStateWithAction state) { TicTacEpisode episode = new TicTacEpisode(); Intent initialIntent = EpsilonGreedy(state); TicTacTardPlayer fakeOpponent1 = new TicTacTardPlayer(0, "0"); TicTacTardPlayer fakeOpponent2 = new TicTacTardPlayer(1, "1"); TicTacTardPlayer currentPlayer = fakeOpponent1; string tokenCurrentPlayer = Token; TicTacTardStateWithAction currentState = new TicTacTardStateWithAction(state, state.intent); currentState.reward = 0; currentState.WinScore = 0; currentState.Visits = 0; int safeLoopIteration = 0; while (currentState.nbActionPlayed < 9 && !fakeOpponent1.playerWon && !fakeOpponent2.playerWon && safeLoopIteration < 200) { ++safeLoopIteration; TicTacTardState newState; if (tokenCurrentPlayer == Token) { newState = TicTacTardGame.PlayAction(currentState, currentPlayer, initialIntent, false); if (newState == null) { initialIntent = TicTacTardGame.GetRandomPossibleMove(currentState); continue; } } else { newState = TicTacTardGame.PlayAction(currentState, currentPlayer, TicTacTardGame.GetRandomPossibleMove(currentState), false); if (newState == null) { continue; } } TicTacTardStateWithAction existingState = ticTacTardStateWithActions.Find(stateSaved => newState.IsSameState(stateSaved)); if (existingState == null) { TicTacTardStateWithAction initNewState = new TicTacTardStateWithAction(newState, TicTacTardGame.GetRandomPossibleMove(newState)); initNewState.prevState = currentState; currentState = initNewState; initialIntent = currentState.intent; ticTacTardStateWithActions.Add(currentState); } else { currentState = existingState; initialIntent = currentState.intent; } episode.EpisodeStates.Add(currentState); currentPlayer = tokenCurrentPlayer == fakeOpponent1.Token ? fakeOpponent2 : fakeOpponent1; tokenCurrentPlayer = currentPlayer.Token; } if (safeLoopIteration >= 200) { Debug.LogError("Safe loopIteration trigger : exit generate episode"); } if (fakeOpponent1.playerWon) { currentState.reward = 1; } else { currentState.reward = 0; } return(episode); }