private QualityLearningState Train()
    {
        _learningState = QualityLearningState.Training;
        for (int epoch = 0; epoch < maxEpochs; ++epoch)
        {
            _environmentMaze.SetAllStateToUncharted();
            int currentState = Random.Next(0, _stateRewards.Length);
            _environmentMaze.GoToState(currentState, _learningState);
            do
            {
                int        nextState       = GetRandNextState(currentState, _environmentMatrix);
                List <int> possibleActions = GetPossibleActions(nextState, _environmentMatrix);
                double     maxQ            = MaxQ(_qMatrix, possibleActions, nextState);
                _qMatrix[currentState][nextState] =
                    ((1 - learningRate) * _qMatrix[currentState][nextState]) +
                    (learningRate * (_stateRewards[nextState] + (discountRate * maxQ)));
                currentState = nextState;
                _environmentMaze.GoToState(currentState, _learningState);
                if (_stateRewards[currentState] < 50.0f)
                {
                    break;
                }
            } while (currentState != goalState);
        } // for

        _learningState = QualityLearningState.Trained;
        return(_learningState);
    }
    public void GoToState(int state, QualityLearningState learningState)
    {
        SpriteRenderer spriteRenderer = environmentMazeStates[state].GetComponent <SpriteRenderer>();

        if (!spriteRenderer)
        {
            Debug.LogError($"SpriteRenderer for state: {state} is null!");
            return;
        }
        spriteRenderer.color = learningState == QualityLearningState.Training ? trainingColor : trainedColor;
    }