/// <summary> /// Initializes a new instance of the <see cref="Optimizer" /> class. /// </summary> /// <param name="lr">The lr.</param> /// <param name="name">The name.</param> public Optimizer(float rescale_grad = 1, Dictionary <int, string> param_idx2name = null, float wd = 0, float?clip_gradient = null, float learning_rate = 0.01f, LRScheduler lr_scheduler = null, Symbol sym = null, uint begin_num_update = 0, bool multi_precision = false, Dictionary <int, Parameter> param_dict = null) { lr = learning_rate; RescaleGrad = rescale_grad; Scheduler = lr_scheduler; if (Scheduler != null) { Scheduler.BaseLearningRate = learning_rate; } WD = wd; BeginNumUpdate = begin_num_update; NumUpdate = begin_num_update; all_index_update_counts.Add(0, new Dictionary <int, int>()); index_update_count = all_index_update_counts[0]; ClipGradient = clip_gradient; MultiPrecision = multi_precision; AggregateNum = 0; if (param_idx2name == null) { param_idx2name = new Dictionary <int, string>(); } Idx2Name = param_idx2name; if (sym != null) { sym_info = (sym.ListAttributeDict(), sym.ListArguments().ToList()); } else { sym_info = new ValueTuple <Dictionary <string, Dictionary <string, string> >, List <string> >( new Dictionary <string, Dictionary <string, string> >(), new List <string>()); } if (param_dict != null) { ParamDict = param_dict; } else { ParamDict = new Dictionary <int, Parameter>(); } SetLrMult(new Dictionary <string, float>()); SetWdMult(new Dictionary <string, float>()); }
public Optimizer(float rescale_grad = 1.0f, Dictionary <int, string> param_idx2_name = null, float wd = 0f, float?clip_gradient = null, float learning_rate = 0.01f, LRScheduler lr_scheduler = null, Symbol sym = null, int begin_num_update = 0) { this._rescale_grad = rescale_grad; this._lr = learning_rate; this._lr_scheduler = lr_scheduler; if (lr_scheduler != null) { this._lr_scheduler.base_lr = learning_rate; } this._wd = wd; this._lr_mult = new Dictionary <string, float>(); this._wd_mult = new Dictionary <string, float>(); this._begin_num_update = begin_num_update; this._num_update = begin_num_update; this._index_update_count = new Dictionary <int, int>(); this._clip_gradient = clip_gradient; if (param_idx2_name == null) { param_idx2_name = new Dictionary <int, string>(); } this._idx2_name = param_idx2_name.ToDictionary(entry => entry.Key, entry => entry.Value); this._sym = sym; this.set_lr_mult(new Dictionary <string, float>()); this.set_wd_mult(new Dictionary <string, float>()); }
public CcSgd(float momentum = 0.0f, float rescale_grad = 1, Dictionary <int, string> param_idx2_name = null, float wd = 0, float clip_gradient = -1, float learning_rate = 0.01F, LRScheduler lr_scheduler = null, Symbol sym = null, int begin_num_update = 0) : base(rescale_grad, param_idx2_name, wd, clip_gradient, learning_rate, lr_scheduler, sym, begin_num_update) { this._momentum = momentum; this._handle = Optimizer._init_cc_optimizer( "ccsgd", new[] { "momentum", "rescale_grad", "clip_gradient" }, new[] { momentum.ToString(CultureInfo.InvariantCulture), rescale_grad.ToString(CultureInfo.InvariantCulture), clip_gradient.ToString(CultureInfo.InvariantCulture) }); }