// Use this for initialization void Start() { PPONetworkContinuousSimple network; if (environment.is3D) { network = new PPONetworkContinuousSimple(8, 2, 2, 32, DeviceDescriptor.CPUDevice, 0.01f); model = new PPOModel(network); trainer = new TrainerPPOSimple(model, LearnerDefs.AdamLearner(learningRate), 1, 10000, 200); trainer.ClipEpsilon = 0.1f; } else { network = new PPONetworkContinuousSimple(5, 2, 2, 32, DeviceDescriptor.CPUDevice, 0.01f); model = new PPOModel(network); trainer = new TrainerPPOSimple(model, LearnerDefs.AdamLearner(learningRate), 1, 10000, 200); } //test //trainer.RewardDiscountFactor = 0.5f; loss = new AutoAverage(iterationForEachTrain); episodePointAve = new AutoAverage(episodeToRunForEachTrain); }
// Use this for initialization void Start() { var network = new PPONetworkContinuousSimple(2, 1, 4, 64, DeviceDescriptor.GPUDevice(0), 0.01f); model = new PPOModel(network); trainer = new TrainerPPOSimple(model, LearnerDefs.AdamLearner(learningRate), 1, 10000, 500); //test //trainer.RewardDiscountFactor = 0.5f; loss = new AutoAverage(iterationForEachTrain); episodePointAve = new AutoAverage(episodeToRunForEachTrain); }