private void FixedUpdate() { OnStep?.Invoke(); Time.timeScale = TimeScale; DecayHyperparameters(); int action; if (Random.Range(0f, 1f) < ExplorationFactor) { action = Random.Range(0, 4); } else { action = QTable[currentState].ToList().IndexOf(QTable[currentState].Max()); } var envResult = AgentMover.Move(action); if (envResult.reward == 1) { OnEpisodeComplete?.Invoke(); } float oldQValue = QTable[currentState][action]; var nextMax = QTable[envResult.state].Max(); var newQValue = oldQValue + LearningRate * (envResult.reward + DiscountFactor * nextMax - oldQValue); QTable[currentState][action] = newQValue; currentState = envResult.state; }
private void HandleOnGameStart() { _agentMover.Move(); _isGettingReady = false; _isFinish = false; }