protected void RunStep() { trainer.Step(environment); bool reset = trainer.Record(environment); episodePoint += environment.LastReward(); //reset if end if (reset && training) { environment.Reset(); episodesThisTrain++; episodePointAve.AddValue(episodePoint); if (episodePointAve.JustUpdated) { scoreUI.text = "(PPO, Continuous) Average Reward:" + episodePointAve.Average; } episodePoint = 0; if (episodesThisTrain >= episodeToRunForEachTrain) { trainer.TrainAllData(minibatch, iterationForEachTrain); //record and print the loss print("Training Loss:" + trainer.LastLoss); trainedCount++; trainer.ClearData(); episodesThisTrain = 0; } } }
protected void RunStep() { trainer.Step(environment); bool reset = trainer.Record(environment); episodePoint += environment.LastReward(); //reset if end if (reset && training) { currentEpisode++; if (environment.GameWinPlayer == 0) { leftWin++; winningRate50Left.AddValue(1); } else { rightWin++; winningRate50Left.AddValue(0); } environment.Reset(); episodesThisTrain++; episodePointAve.AddValue(episodePoint); episodePoint = 0; if (episodesThisTrain >= episodeToRunForEachTrain) { trainer.TrainAllData(minibatch, iterationForEachTrain); //record and print the loss print("Training Loss:" + trainer.LastLoss); trainedCount++; trainer.ClearData(); episodesThisTrain = 0; } } }