Ejemplo n.º 1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="parameters"></param>
        /// <param name="learningRate">A learning rate in float, or a learning rate schedule</param>
        /// <param name="gamma">Trade-off factor for current and previous gradients. Common value is 0.95. Should be in range (0.0, 1.0)</param>
        /// <param name="inc">Increasing factor when trying to adjust current learning_rate. Should be greater than 1</param>
        /// <param name="dec">Decreasing factor when trying to adjust current learning_rate. Should be in range (0.0, 1.0)</param>
        /// <param name="max">Maximum scale allowed for the initial learning_rate. Should be greater than zero and min</param>
        /// <param name="min">Minimum scale allowed for the initial learning_rate. Should be greater than zero</param>
        /// <param name="l1Regularization">L1 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="l2Regularization">L2 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="gradientClippingThresholdPerSample">clipping threshold per sample, defaults to infinity. (Default is infinity, so no clipping)</param>
        /// <param name="gradientClippingWithTruncation">clipping threshold per sample, defaults to infinity. (Default is true)</param>
        /// <param name="needAveMultiplier"></param>
        /// <returns></returns>
        public static Learner RMSProp(IList <Parameter> parameters,
                                      double learningRate     = 0.001,
                                      double gamma            = 0.9,
                                      double inc              = 2.0,
                                      double dec              = 0.5,
                                      double max              = 2.0,
                                      double min              = 0.5,
                                      double l1Regularization = 0.0,
                                      double l2Regularization = 0.0,
                                      double gradientClippingThresholdPerSample = double.PositiveInfinity,
                                      bool gradientClippingWithTruncation       = true,
                                      bool needAveMultiplier = true)
        {
            var learningRatePerSample = new TrainingParameterScheduleDouble(learningRate, 1);

            var options = SetAdditionalOptions(l1Regularization,
                                               l2Regularization,
                                               gradientClippingThresholdPerSample,
                                               gradientClippingWithTruncation);

            return(CNTKLib.RMSPropLearner(CntkUtilities.CreateParameterVector(parameters),
                                          learningRatePerSample,
                                          gamma, inc, dec, max, min,
                                          needAveMultiplier, options));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Stochastic Gradient Descent.
        /// </summary>
        /// <param name="parameters">Learnable parameters of the model</param>
        /// <param name="learningRate">Learning rate. (Default is 0.001)</param>
        /// <param name="l1Regularization">L1 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="l2Regularization">L2 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="gradientClippingThresholdPerSample">clipping threshold per sample, defaults to infinity. (Default is infinity, so no clipping)</param>
        /// <param name="gradientClippingWithTruncation">clipping threshold per sample, defaults to infinity. (Default is true)</param>
        /// <returns></returns>
        public static Learner SGD(IList <Parameter> parameters,
                                  double learningRate     = 0.01,
                                  double l1Regularization = 0.0,
                                  double l2Regularization = 0.0,
                                  double gradientClippingThresholdPerSample = double.PositiveInfinity,
                                  bool gradientClippingWithTruncation       = true)
        {
            if (parameters == null)
            {
                throw new ArgumentNullException("parameters");
            }
            if (learningRate < 0.0)
            {
                throw new ArgumentException(nameof(learningRate) + " Has to be larger or equal to 0");
            }
            if (l1Regularization < 0.0)
            {
                throw new ArgumentException(nameof(l1Regularization) + " Has to be larger or equal to 0");
            }
            if (l2Regularization < 0.0)
            {
                throw new ArgumentException(nameof(l2Regularization) + " Has to be larger or equal to 0");
            }

            var learningRatePerSample = new TrainingParameterScheduleDouble(learningRate, 1);

            var options = SetAdditionalOptions(l1Regularization,
                                               l2Regularization,
                                               gradientClippingThresholdPerSample,
                                               gradientClippingWithTruncation);

            return(CNTKLib.SGDLearner(CntkUtilities.CreateParameterVector(parameters),
                                      learningRatePerSample, options));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Adam (Adaptive Moment Estimation) is another method that computes adaptive learning rates for each parameter.
        /// In addition to storing an exponentially decaying average of past squared gradients vtvt like Adadelta,
        /// Adam also keeps an exponentially decaying average of past gradients, similar to momentum.
        /// Essentially Adam is RMSProp with momentum.
        /// https://arxiv.org/pdf/1412.6980.pdf.
        /// </summary>
        /// <param name="parameters">Learnable parameters of the model</param>
        /// <param name="learningRate">Learning rate. (Default is 0.001)</param>
        /// <param name="momentum">Momentum (default is 0.9).
        /// Note that this is the beta1 parameter in the Adam paper.</param>
        /// <param name="varianceMomentum">variance momentum schedule. (Default is 0.999).
        /// Note that this is the beta2 parameter in the Adam paper.</param>
        /// <param name="l1Regularization">L1 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="l2Regularization">L2 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="gradientClippingThresholdPerSample">clipping threshold per sample, defaults to infinity. (Default is infinity, so no clipping)</param>
        /// <param name="gradientClippingWithTruncation">clipping threshold per sample, defaults to infinity. (Default is true)</param>
        /// <param name="unitGain"></param>
        /// <param name="epsilon"></param>
        /// <returns></returns>
        public static Learner Adam(IList <Parameter> parameters, double learningRate = 0.001,
                                   double momentum         = 0.9, double varianceMomentum = 0.999,
                                   double l1Regularization = 0.0,
                                   double l2Regularization = 0.0,
                                   double gradientClippingThresholdPerSample = double.PositiveInfinity,
                                   bool gradientClippingWithTruncation       = true,
                                   bool unitGain  = true,
                                   double epsilon = 1e-08f)
        {
            var learningRatePerSample = new TrainingParameterScheduleDouble(learningRate, 1);
            var momentumRate          = new TrainingParameterScheduleDouble(momentum, 1);
            var varianceMomentumRate  = new TrainingParameterScheduleDouble(varianceMomentum, 1);

            var options = SetAdditionalOptions(l1Regularization,
                                               l2Regularization,
                                               gradientClippingThresholdPerSample,
                                               gradientClippingWithTruncation);

            return(CNTKLib.AdamLearner(CntkUtilities.CreateParameterVector(parameters),
                                       learningRatePerSample,
                                       momentumRate,
                                       unitGain,
                                       varianceMomentumRate,
                                       epsilon,
                                       false,
                                       options));
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Stochastic Gradient Descent with momentum.
        /// </summary>
        /// <param name="parameters">Learnable parameters of the model</param>
        /// <param name="learningRate">Learning rate. (Default is 0.001)</param>
        /// <param name="momentum">Momentum (default is 0.9).</param>
        /// <param name="l1Regularization">L1 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="l2Regularization">L2 regularization term. (Default is 0, so no regularization)</param>
        /// <param name="gradientClippingThresholdPerSample">clipping threshold per sample, defaults to infinity. (Default is infinity, so no clipping)</param>
        /// <param name="gradientClippingWithTruncation">clipping threshold per sample, defaults to infinity. (Default is true)</param>
        /// <param name="unitGain"></param>
        /// <returns></returns>
        public static Learner MomentumSGD(IList <Parameter> parameters,
                                          double learningRate     = 0.01, double momentum = 0.1,
                                          double l1Regularization = 0.0,
                                          double l2Regularization = 0.0,
                                          double gradientClippingThresholdPerSample = double.PositiveInfinity,
                                          bool gradientClippingWithTruncation       = true,
                                          bool unitGain = true)
        {
            var learningRatePerSample = new TrainingParameterScheduleDouble(learningRate, 1);
            var momentumPerSample     = new TrainingParameterScheduleDouble(momentum, 1);

            var options = SetAdditionalOptions(l1Regularization,
                                               l2Regularization,
                                               gradientClippingThresholdPerSample,
                                               gradientClippingWithTruncation);

            return(CNTKLib.MomentumSGDLearner(CntkUtilities.CreateParameterVector(parameters),
                                              learningRatePerSample, momentumPerSample, unitGain,
                                              options));
        }