Exemplo n.º 1
0
    // Use this for initialization
    void Start()
    {
        PPONetworkContinuousSimple network;

        if (environment.is3D)
        {
            network             = new PPONetworkContinuousSimple(8, 2, 2, 32, DeviceDescriptor.CPUDevice, 0.01f);
            model               = new PPOModel(network);
            trainer             = new TrainerPPOSimple(model, LearnerDefs.AdamLearner(learningRate), 1, 10000, 200);
            trainer.ClipEpsilon = 0.1f;
        }
        else
        {
            network = new PPONetworkContinuousSimple(5, 2, 2, 32, DeviceDescriptor.CPUDevice, 0.01f);
            model   = new PPOModel(network);
            trainer = new TrainerPPOSimple(model, LearnerDefs.AdamLearner(learningRate), 1, 10000, 200);
        }



        //test
        //trainer.RewardDiscountFactor = 0.5f;

        loss            = new AutoAverage(iterationForEachTrain);
        episodePointAve = new AutoAverage(episodeToRunForEachTrain);
    }
Exemplo n.º 2
0
    // Use this for initialization
    void Start()
    {
        var network = new PPONetworkContinuousSimple(2, 1, 4, 64, DeviceDescriptor.GPUDevice(0), 0.01f);

        model   = new PPOModel(network);
        trainer = new TrainerPPOSimple(model, LearnerDefs.AdamLearner(learningRate), 1, 10000, 500);

        //test
        //trainer.RewardDiscountFactor = 0.5f;

        loss            = new AutoAverage(iterationForEachTrain);
        episodePointAve = new AutoAverage(episodeToRunForEachTrain);
    }