Esempio n. 1
0
    private void EndEpisode(bool win, double reward)
    {
        if (validationRun_)
        {
            tracker_.SaveValidationEpisode(reward, win, steps_);
        }
        else if (learning)
        {
            tracker_.SaveEpisode(reward, win, steps_, epsilon_, alpha_);
        }

        epsilon_ = endEpsilon + (startEpsilon - endEpsilon) * Mathf.Pow(1f - epsilonDecay, episodes_);
        alpha_   = endLearningRate + (startLearningRate - endLearningRate) * Mathf.Pow(1f - learningDecay, episodes_);
        episodes_++;
        lastReward_ = reward;
        if (reward_ > bestReward_)
        {
            bestReward_ = reward;
        }
        sumReward_ += reward;

        if (win)
        {
            wins_++;
        }
        else
        {
            fails_++;
        }
        savedRewards_[saveInd_] = reward;
        savedWins_[saveInd_]    = win;
        saveInd_ = (saveInd_ + 1) % nSaves_;


        // Learning stop logic
        if (!stopped && episodes_ >= stopAfter)
        {
            tracker_.EndRun();
            superSpeed = 1;
            stopped    = true;
            learning   = false;
        }
        else if (!stopped)
        {
            validationRun_ = episodes_ % validateEvery == 0;
        }
        else
        {
            validationRun_ = false;
        }

        Reset();
    }