static void UpdateLearningRate(IOptimizer optimizer, Variable step, LearningRateSchedule learningRateSchedule) { Tensor learningRate = learningRateSchedule.Get(step: step); var optimizerLearningRate = optimizer.DynamicGet <Variable>("lr"); optimizerLearningRate.assign(learningRate); }
public Tensor create_learning_rate(LearningRateSchedule lr_schedule, float lr, RefVariable global_step, float max_step) { /*if (lr_schedule == LearningRateSchedule.CONSTANT) * learning_rate = tf.Variable(lr); * else */if (lr_schedule == LearningRateSchedule.LINEAR) { return(tf.train.polynomial_decay( lr, global_step, max_step, 1e-10f, power: 1.0f)); } throw new NotImplementedException("create_learning_rate"); }
/// <summary> /// Takes a Unity environment and model-specific hyper-parameters and returns the /// appropriate PPO agent model for the environment. /// </summary> /// <param name="brain">BrainInfo used to generate specific network graph.</param> /// <param name="lr">Learning rate.</param> /// <param name="lr_schedule">Learning rate decay schedule.</param> /// <param name="h_size">Size of hidden layers</param> /// <param name="epsilon">Value for policy-divergence threshold.</param> /// <param name="beta">Strength of entropy regularization.</param> /// <param name="max_step">Total number of training steps.</param> /// <param name="normalize">Whether to normalize vector observation input.</param> /// <param name="use_recurrent">Whether to use an LSTM layer in the network.</param> /// <param name="num_layers">Number of hidden layers between encoded input and policy & value layers</param> /// <param name="m_size">Size of brain memory.</param> /// <param name="seed">Seed to use for initialization of model.</param> /// <param name="stream_names">List of names of value streams. Usually, a list of the Reward Signals being used.</param> /// <param name="vis_encode_type"></param> public PPOModel(BrainParameters brain, float lr = 0.0001f, LearningRateSchedule lr_schedule = LearningRateSchedule.LINEAR, int h_size = 128, float epsilon = 0.2f, float beta = 0.001f, float max_step = 5e6f, bool normalize = false, bool use_recurrent = false, int num_layers = 2, int?m_size = null, int seed = 0, List <string> stream_names = null, EncoderType vis_encode_type = EncoderType.SIMPLE) : base(m_size: m_size, normalize: normalize, use_recurrent: use_recurrent, brain: brain, seed: seed, stream_names: stream_names) { // optimizer: Optional[tf.train.AdamOptimizer] = null; // update_batch: Optional[tf.Operation] = null; if (num_layers < 1) { num_layers = 1; } if (brain.vector_action_space_type == "continuous") { throw new NotImplementedException("brain.vector_action_space_type"); // create_cc_actor_critic(h_size, num_layers, vis_encode_type); // entropy = tf.ones_like(tf.reshape(value, [-1])) * entropy; } else { create_dc_actor_critic(h_size, num_layers, vis_encode_type); } learning_rate = create_learning_rate(lr_schedule, lr, global_step, max_step); create_losses( log_probs, old_log_probs, value_heads, entropy, beta, epsilon, lr, max_step); }