Esempio n. 1
0
    protected void RunStep()
    {
        trainer.Step(environment);
        rewardOneEpi += environment.LastReward(0);
        bool reset = trainer.Record(environment);

        //training
        if (trainer.Steps >= stepsBeforeTrain && trainer.Steps % trainingStepInterval == 0)
        {
            trainer.TrainRandomBatch(batchSize);

            //log the loss
            loss.AddValue(trainer.LastLoss);
            if (loss.JustUpdated)
            {
                print("Loss:" + loss.Average);
            }
        }
        //reset if end
        if (environment.IsEnd() || (reset && training))
        {
            currentEpisode++;
            rewardEpiAve.AddValue(rewardOneEpi);
            rewardOneEpi = 0;
            environment.Reset();
        }
    }
Esempio n. 2
0
    protected void RunStep()
    {
        trainer.Step(environment);
        rewardLeftOneEpi += environment.LastReward(0);
        bool reset = trainer.Record(environment);

        //training
        if (trainer.Steps >= stepsBeforeTrain && trainer.Steps % trainingStepInterval == 0)
        {
            trainer.TrainRandomBatch(batchSize);

            //log the loss
            loss.AddValue(trainer.LastLoss);
            if (loss.JustUpdated)
            {
                print("Loss:" + loss.Average);
            }
        }
        //reset if end
        if (environment.IsEnd() || (reset && training))
        {
            currentEpisode++;
            if (environment.GameWinPlayer == 0)
            {
                leftWin++;
                winningRate50Left.AddValue(1);
            }
            else
            {
                rightWin++;
                winningRate50Left.AddValue(0);
            }
            reward50EpiLeft.AddValue(rewardLeftOneEpi);
            rewardLeftOneEpi = 0;
            environment.Reset();
        }
    }