private QualityLearningState Train() { _learningState = QualityLearningState.Training; for (int epoch = 0; epoch < maxEpochs; ++epoch) { _environmentMaze.SetAllStateToUncharted(); int currentState = Random.Next(0, _stateRewards.Length); _environmentMaze.GoToState(currentState, _learningState); do { int nextState = GetRandNextState(currentState, _environmentMatrix); List <int> possibleActions = GetPossibleActions(nextState, _environmentMatrix); double maxQ = MaxQ(_qMatrix, possibleActions, nextState); _qMatrix[currentState][nextState] = ((1 - learningRate) * _qMatrix[currentState][nextState]) + (learningRate * (_stateRewards[nextState] + (discountRate * maxQ))); currentState = nextState; _environmentMaze.GoToState(currentState, _learningState); if (_stateRewards[currentState] < 50.0f) { break; } } while (currentState != goalState); } // for _learningState = QualityLearningState.Trained; return(_learningState); }
public void GoToState(int state, QualityLearningState learningState) { SpriteRenderer spriteRenderer = environmentMazeStates[state].GetComponent <SpriteRenderer>(); if (!spriteRenderer) { Debug.LogError($"SpriteRenderer for state: {state} is null!"); return; } spriteRenderer.color = learningState == QualityLearningState.Training ? trainingColor : trainedColor; }