private void EndEpisode(bool win, double reward) { if (validationRun_) { tracker_.SaveValidationEpisode(reward, win, steps_); } else if (learning) { tracker_.SaveEpisode(reward, win, steps_, epsilon_, alpha_); } epsilon_ = endEpsilon + (startEpsilon - endEpsilon) * Mathf.Pow(1f - epsilonDecay, episodes_); alpha_ = endLearningRate + (startLearningRate - endLearningRate) * Mathf.Pow(1f - learningDecay, episodes_); episodes_++; lastReward_ = reward; if (reward_ > bestReward_) { bestReward_ = reward; } sumReward_ += reward; if (win) { wins_++; } else { fails_++; } savedRewards_[saveInd_] = reward; savedWins_[saveInd_] = win; saveInd_ = (saveInd_ + 1) % nSaves_; // Learning stop logic if (!stopped && episodes_ >= stopAfter) { tracker_.EndRun(); superSpeed = 1; stopped = true; learning = false; } else if (!stopped) { validationRun_ = episodes_ % validateEvery == 0; } else { validationRun_ = false; } Reset(); }