Exemple #1
0
    private void FixedUpdate()
    {
        OnStep?.Invoke();
        Time.timeScale = TimeScale;

        DecayHyperparameters();

        int action;

        if (Random.Range(0f, 1f) < ExplorationFactor)
        {
            action = Random.Range(0, 4);
        }
        else
        {
            action = QTable[currentState].ToList().IndexOf(QTable[currentState].Max());
        }

        var envResult = AgentMover.Move(action);

        if (envResult.reward == 1)
        {
            OnEpisodeComplete?.Invoke();
        }


        float oldQValue = QTable[currentState][action];

        var nextMax = QTable[envResult.state].Max();

        var newQValue = oldQValue + LearningRate * (envResult.reward + DiscountFactor * nextMax - oldQValue);

        QTable[currentState][action] = newQValue;

        currentState = envResult.state;
    }
Exemple #2
0
 private void HandleOnGameStart()
 {
     _agentMover.Move();
     _isGettingReady = false;
     _isFinish       = false;
 }