Example #1
0
    protected void RunStep()
    {
        trainer.Step(environment);
        bool reset = trainer.Record(environment);

        episodePoint += environment.LastReward();

        //reset if end
        if (reset && training)
        {
            environment.Reset();
            episodesThisTrain++;
            episodePointAve.AddValue(episodePoint);
            if (episodePointAve.JustUpdated)
            {
                scoreUI.text = "(PPO, Continuous) Average Reward:" + episodePointAve.Average;
            }
            episodePoint = 0;

            if (episodesThisTrain >= episodeToRunForEachTrain)
            {
                trainer.TrainAllData(minibatch, iterationForEachTrain);
                //record and print the loss
                print("Training Loss:" + trainer.LastLoss);
                trainedCount++;
                trainer.ClearData();
                episodesThisTrain = 0;
            }
        }
    }
Example #2
0
    protected void RunStep()
    {
        trainer.Step(environment);
        bool reset = trainer.Record(environment);

        episodePoint += environment.LastReward();

        //reset if end
        if (reset && training)
        {
            currentEpisode++;
            if (environment.GameWinPlayer == 0)
            {
                leftWin++;
                winningRate50Left.AddValue(1);
            }
            else
            {
                rightWin++;
                winningRate50Left.AddValue(0);
            }


            environment.Reset();
            episodesThisTrain++;
            episodePointAve.AddValue(episodePoint);
            episodePoint = 0;

            if (episodesThisTrain >= episodeToRunForEachTrain)
            {
                trainer.TrainAllData(minibatch, iterationForEachTrain);
                //record and print the loss
                print("Training Loss:" + trainer.LastLoss);
                trainedCount++;
                trainer.ClearData();
                episodesThisTrain = 0;
            }
        }
    }