protected void RunStep() { trainer.Step(environment); rewardOneEpi += environment.LastReward(0); bool reset = trainer.Record(environment); //training if (trainer.Steps >= stepsBeforeTrain && trainer.Steps % trainingStepInterval == 0) { trainer.TrainRandomBatch(batchSize); //log the loss loss.AddValue(trainer.LastLoss); if (loss.JustUpdated) { print("Loss:" + loss.Average); } } //reset if end if (environment.IsEnd() || (reset && training)) { currentEpisode++; rewardEpiAve.AddValue(rewardOneEpi); rewardOneEpi = 0; environment.Reset(); } }
protected void RunStep() { trainer.Step(environment); rewardLeftOneEpi += environment.LastReward(0); bool reset = trainer.Record(environment); //training if (trainer.Steps >= stepsBeforeTrain && trainer.Steps % trainingStepInterval == 0) { trainer.TrainRandomBatch(batchSize); //log the loss loss.AddValue(trainer.LastLoss); if (loss.JustUpdated) { print("Loss:" + loss.Average); } } //reset if end if (environment.IsEnd() || (reset && training)) { currentEpisode++; if (environment.GameWinPlayer == 0) { leftWin++; winningRate50Left.AddValue(1); } else { rightWin++; winningRate50Left.AddValue(0); } reward50EpiLeft.AddValue(rewardLeftOneEpi); rewardLeftOneEpi = 0; environment.Reset(); } }