protected void RunStep() { trainer.Step(environment); bool reset = trainer.Record(environment); episodePoint += environment.LastReward(); //reset if end if (reset && training) { currentEpisode++; if (environment.GameWinPlayer == 0) { leftWin++; winningRate50Left.AddValue(1); } else { rightWin++; winningRate50Left.AddValue(0); } environment.Reset(); episodesThisTrain++; episodePointAve.AddValue(episodePoint); episodePoint = 0; if (episodesThisTrain >= episodeToRunForEachTrain) { trainer.TrainAllData(minibatch, iterationForEachTrain); //record and print the loss print("Training Loss:" + trainer.LastLoss); trainedCount++; trainer.ClearData(); episodesThisTrain = 0; } } }
protected void RunStep() { trainer.Step(environment); rewardLeftOneEpi += environment.LastReward(0); bool reset = trainer.Record(environment); //training if (trainer.Steps >= stepsBeforeTrain && trainer.Steps % trainingStepInterval == 0) { trainer.TrainRandomBatch(batchSize); //log the loss loss.AddValue(trainer.LastLoss); if (loss.JustUpdated) { print("Loss:" + loss.Average); } } //reset if end if (environment.IsEnd() || (reset && training)) { currentEpisode++; if (environment.GameWinPlayer == 0) { leftWin++; winningRate50Left.AddValue(1); } else { rightWin++; winningRate50Left.AddValue(0); } reward50EpiLeft.AddValue(rewardLeftOneEpi); rewardLeftOneEpi = 0; environment.Reset(); } }
public override void AgentReset() { environment.Reset(); }