private Intent GetBestIntent(State currentState)
    {
        float  max        = float.MinValue;
        Intent bestIntent = currentState.ticTacToePolicy;

        for (int i = 0; i < 9; ++i)
        {
            if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent((Intent)i), currentState.currentGrid))
            {
                Dictionary <State, float> possibleStates = GetPossibleStatesFromIntent(currentState, (Intent)i);

                float totalValue = 0;
                foreach (var possibleState in possibleStates)
                {
                    totalValue += possibleState.Key.stateValue * possibleState.Value;
                }

                if (totalValue > max)
                {
                    max        = totalValue;
                    bestIntent = (Intent)i;
                }
            }
        }

        return(bestIntent);
    }
    private Intent GetRandomValidIntent(Cell[][] currentGrid)
    {
        Intent rdmIntent = Intent.Tile0;
        bool   intentValid;
        int    iter = 0;

        do
        {
            ++iter;
            rdmIntent   = (Intent)Random.Range(0, 9);
            intentValid = ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent(rdmIntent), currentGrid);
        } while (!intentValid && iter < 20);
        return(rdmIntent);
    }
    private Cell[][] ApplyIntentToGrid(Intent intent, Cell[][] grid, CellType type)
    {
        switch (intent)
        {
        case Intent.Tile0:
            grid[0][2].cellTicTacToeType = type;
            break;

        case Intent.Tile1:
            grid[1][2].cellTicTacToeType = type;
            break;

        case Intent.Tile2:
            grid[2][2].cellTicTacToeType = type;
            break;

        case Intent.Tile3:
            grid[0][1].cellTicTacToeType = type;
            break;

        case Intent.Tile4:
            grid[1][1].cellTicTacToeType = type;
            break;

        case Intent.Tile5:
            grid[2][1].cellTicTacToeType = type;
            break;

        case Intent.Tile6:
            grid[0][0].cellTicTacToeType = type;
            break;

        case Intent.Tile7:
            grid[1][0].cellTicTacToeType = type;
            break;

        case Intent.Tile8:
            grid[2][0].cellTicTacToeType = type;
            break;
        }

        return(grid);
    }
    private bool MonteCarloImprovement(State currentState)
    {
        bool policyStable = true;

        foreach (var state in _allStates)
        {
            Intent tempPolicy = state.ticTacToePolicy;
            state.ticTacToePolicy = GetBestIntent(state);
            if (tempPolicy != state.ticTacToePolicy)
            {
                policyStable = false;
            }
        }

        if (!policyStable)
        {
            MonteCarloPrediction(currentState, episodesNumber, everyVisit, onPolicy);
        }

        return(policyStable);
    }
    private State GetNextState(State currentState, Intent intent)
    {
        Cell[][] grid = CopyGrid(currentState.currentGrid);

        grid = ApplyIntentToGrid(intent, grid, CellType.Circle);

        /*for (int i = 0; i < grid.Length; i++)
         * {
         *  for (int j = 0; j < grid[0].Length; j++)
         *  {
         *      Debug.Log(grid[i][j].cellTicTacToeType);
         *  }
         * }*/

        State nextState = GetStateFromGrid(grid);

        if (nextState != null)
        {
            //Debug.Log("pasnew");
            return(nextState);
        }
        else
        {
            //Debug.Log("new");

            Intent rdmIntent = GetRandomValidIntent(grid);
            if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent(rdmIntent), grid))
            {
                Vector3 player1Pos = ticTacToeController.GetPositionFromIntent(rdmIntent);
                grid[(int)player1Pos.x][(int)player1Pos.z].cellTicTacToeType = CellType.Cross;
            }

            State newState = new State();
            newState.currentGrid     = grid;
            newState.ticTacToePolicy = GetRandomValidIntent(grid);
            newState.stateValue      = 0;
            _allStates.Add(newState);
            return(newState);
        }
    }
    private Dictionary <State, float> GetPossibleStatesFromIntent(State currentState, Intent intent)
    {
        Dictionary <State, float> possibleStates = new Dictionary <State, float>();

        Cell[][] simulatedGrid = CopyGrid(currentState.currentGrid);
        simulatedGrid = ApplyIntentToGrid(intent, simulatedGrid, CellType.Circle);

        int i             = 0;
        int possibilities = 0;

        for (i = 0; i < 9; i++)
        {
            if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent((Intent)i), simulatedGrid))
            {
                ++possibilities;
            }
        }

        for (i = 0; i < 9; i++)
        {
            if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent((Intent)i), simulatedGrid))
            {
                Cell[][] copyGrid = CopyGrid(simulatedGrid);
                copyGrid = ApplyIntentToGrid((Intent)i, copyGrid, CellType.Cross);

                State possibleState = GetStateFromGrid(copyGrid);
                if (possibleState == null)
                {
                    possibleState = new State()
                    {
                        stateValue      = 0.0f,
                        currentGrid     = copyGrid,
                        ticTacToePolicy = GetRandomValidIntent(copyGrid),
                        nS      = 0.0f,
                        returnS = 0.0f
                    };
                }

                possibleStates.Add(possibleState, 1.0f / possibilities);
            }
        }

        return(possibleStates);
    }
    private bool MonteCarloPrediction(State currentState, int iteration, bool everyVisit = false, bool onPolicy = false)
    {
        //initialisation
        foreach (var state in _allStates)
        {
            state.nS      = 0;
            state.returnS = 0;
        }

        //simulation
        for (int i = 0; i < iteration; ++i)
        {
            _simulatedSARs = new List <SAR>();
            SimulateGame(currentState);
            float g = 0;
            for (int j = _simulatedSARs.Count - 2; j >= 0; --j)
            {
                g += _simulatedSARs[j + 1].reward;

                bool isContained = false;
                if (!everyVisit)
                {
                    for (int k = 0; k < j; k++)
                    {
                        if (_simulatedSARs[k] == _simulatedSARs[j])
                        {
                            isContained = true;
                            break;
                        }
                    }
                }

                if (!everyVisit && !isContained || everyVisit)
                {
                    _simulatedSARs[j].state.returnS += g;
                    ++_simulatedSARs[j].state.nS;
                }
            }

            if (onPolicy)
            {
                foreach (var state in _allStates)
                {
                    state.stateValue = state.returnS / state.nS;
                }

                /*foreach (var state in _allStates)
                 * {
                 *  state.ticTacToePolicy = GetBestIntent(state);
                 * }*/
                bool policyStable = true;
                foreach (var state in _allStates)
                {
                    if (state == currentState)
                    {
                        Debug.Log("updateStatePolicy");
                    }
                    Intent tempPolicy = state.ticTacToePolicy;
                    state.ticTacToePolicy = GetBestIntent(state);
                    if (tempPolicy != state.ticTacToePolicy)
                    {
                        policyStable = false;
                    }
                }

                /*currentState.ticTacToePolicy = GetBestIntent(currentState);
                 * return true;*/
                return(policyStable);
            }
        }

        if (!onPolicy)
        {
            foreach (var state in _allStates)
            {
                state.stateValue = state.returnS / state.nS;
            }

            bool policyStable = true;
            foreach (var state in _allStates)
            {
                if (state == currentState)
                {
                    Debug.Log("updateStatePolicy");
                }
                Intent tempPolicy = state.ticTacToePolicy;
                state.ticTacToePolicy = GetBestIntent(state);
                if (tempPolicy != state.ticTacToePolicy)
                {
                    policyStable = false;
                }
            }
            return(policyStable);

            /*currentState.ticTacToePolicy = GetBestIntent(currentState);
             * return true;*/
        }

        return(false);
    }