/// <inheritdoc /> internal override Learner ToLearner(Function model) { var learningRatePerSample = new TrainingParameterScheduleDouble(LearningRate, 1); return(CNTKLib.AdaGradLearner(new ParameterVector(((CNTK.Function)model).Parameters().ToArray()), learningRatePerSample, false, GetAdditionalLearningOptions())); }
public override Learner GetOptimizer(IList <Parameter> learningParameters) { var learningOptions = new AdditionalLearningOptions() { l1RegularizationWeight = _l1RegularizationWeight, l2RegularizationWeight = _l2RegularizationWeight, gradientClippingWithTruncation = _gradientClippingThresholdPerSample != double.PositiveInfinity, gradientClippingThresholdPerSample = _gradientClippingThresholdPerSample }; return(CNTKLib.AdaGradLearner(new ParameterVector((ICollection)learningParameters), new TrainingParameterScheduleDouble(LearningRate, (uint)MinibatchSize), false, learningOptions)); }
internal static Learner GetInitializer(IList <Parameter> parameters, NeuralNetworkSettingsEntity s) { var vector = new ParameterVector((ICollection)parameters); switch (s.Learner) { case NeuralNetworkLearner.Adam: return(CNTKLib.AdamLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false, s.LearningVarianceMomentum?.ToTrainParam())); case NeuralNetworkLearner.AdaDelta: return(CNTKLib.AdaDeltaLearner(vector, s.LearningRate.ToTrainParam())); case NeuralNetworkLearner.AdaGrad: return(CNTKLib.AdaGradLearner(vector, s.LearningRate.ToTrainParam())); case NeuralNetworkLearner.FSAdaGrad: return(CNTKLib.FSAdaGradLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false, s.LearningVarianceMomentum?.ToTrainParam())); case NeuralNetworkLearner.RMSProp: return(CNTKLib.FSAdaGradLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false, s.LearningVarianceMomentum?.ToTrainParam())); case NeuralNetworkLearner.MomentumSGD: return(CNTKLib.MomentumSGDLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false)); case NeuralNetworkLearner.SGD: return(CNTKLib.SGDLearner(vector, s.LearningRate.ToTrainParam())); default: throw new InvalidOperationException("Unexpected Learner"); } }
/// <summary> /// Creates the learner based on learning parameters. /// ToDo: Not all learners parameters defined /// </summary> /// <param name="network">Network model being trained</param> /// <param name="lrParams">Learning parameters.</param> /// <returns></returns> private List <Learner> createLearners(Function network, LearningParameters lrParams) { //learning rate and momentum values var lr = new TrainingParameterScheduleDouble(lrParams.LearningRate); var mm = CNTKLib.MomentumAsTimeConstantSchedule(lrParams.Momentum); var addParam = new AdditionalLearningOptions(); // if (lrParams.L1Regularizer > 0) { addParam.l1RegularizationWeight = lrParams.L1Regularizer; } if (lrParams.L2Regularizer > 0) { addParam.l2RegularizationWeight = lrParams.L2Regularizer; } //SGD Momentum learner if (lrParams.LearnerType == LearnerType.MomentumSGDLearner) { // var llr = new List <Learner>(); var msgd = Learner.MomentumSGDLearner(network.Parameters(), lr, mm, true, addParam); llr.Add(msgd); return(llr); } //SGDLearner - rate and regulars else if (lrParams.LearnerType == LearnerType.SGDLearner) { // var llr = new List <Learner>(); var msgd = Learner.SGDLearner(network.Parameters(), lr, addParam); llr.Add(msgd); return(llr); } //FSAdaGradLearner learner - rate, moment regulars else if (lrParams.LearnerType == LearnerType.FSAdaGradLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.FSAdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, mm); llr.Add(msgd); return(llr); } //AdamLearner learner else if (lrParams.LearnerType == LearnerType.AdamLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.AdamLearner(new ParameterVector(network.Parameters().ToList()), lr, mm); llr.Add(msgd); return(llr); } //AdaGradLearner learner - Learning rate and regularizers else if (lrParams.LearnerType == LearnerType.AdaGradLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.AdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, false, addParam); llr.Add(msgd); return(llr); } else { throw new Exception("Learner type is not supported!"); } }
/// <summary> /// Adagrad is an algorithm for gradient-based optimization that does just this: It adapts the learning rate to the parameters, performing larger updates for infrequent and smaller updates for frequent parameters /// </summary> /// <param name="modelOutput">The model output.</param> /// <param name="learningRate">The learning rate.</param> /// <param name="regulizer">The regulizer.</param> /// <returns>Learner.</returns> private Learner AdaGrad(Function modelOutput, double learningRate = 0.01, Regulizers regulizer = null) { CNTK.TrainingParameterScheduleDouble learningRatePerSample = new CNTK.TrainingParameterScheduleDouble(learningRate, 1); return(CNTKLib.AdaGradLearner(new ParameterVector(modelOutput.Parameters().ToList()), learningRatePerSample, false, GetAdditionalLearningOptions())); }
protected override Learner GenerateLearner(IList <Parameter> parameters, TrainingParameterScheduleDouble learningRateSchedule) { return(CNTKLib.AdaGradLearner(new ParameterVector(parameters.ToArray()), learningRateSchedule, NoNeedAveMultiplier, Options)); }