public AmsGrad(T?alpha = null, T?beta1 = null, T?beta2 = null, T?epsilon = null, T?eta = null) : base(alpha: alpha, beta1: beta1, beta2: beta2, epsilon: epsilon, eta: eta) { switch (this) { case AmsGrad <float> amsGradF: amsGradF.Update = () => OptimizerF.Update(amsGradF); amsGradF.UpdateFunctionParameters = (i) => AmsGradF.UpdateFunctionParameters(amsGradF.Alpha, amsGradF.Beta1, amsGradF.Beta2, amsGradF.Epsilon, amsGradF.Eta, UpdateCount, amsGradF.FunctionParameters[i], amsGradF.m[i], amsGradF.v[i], amsGradF.vhat[i]); break; case AmsGrad <double> amsGradD: amsGradD.Update = () => OptimizerD.Update(amsGradD); amsGradD.UpdateFunctionParameters = (i) => AmsGradD.UpdateFunctionParameters(amsGradD.Alpha, amsGradD.Beta1, amsGradD.Beta2, amsGradD.Epsilon, amsGradD.Eta, UpdateCount, amsGradD.FunctionParameters[i], amsGradD.m[i], amsGradD.v[i], amsGradD.vhat[i]); break; } }
public GradientClipping(T threshold) { this.Threshold = threshold; switch (this) { case GradientClipping <float> gradientClippingF: gradientClippingF.Update = () => OptimizerF.Update(gradientClippingF); gradientClippingF.UpdateFunctionParameters = (i) => GradientClippingF.UpdateFunctionParameters(gradientClippingF.Threshold, gradientClippingF.FunctionParameters[i]); break; case GradientClipping <double> gradientClippingD: gradientClippingD.Update = () => OptimizerD.Update(gradientClippingD); gradientClippingD.UpdateFunctionParameters = (i) => GradientClippingD.UpdateFunctionParameters(gradientClippingD.Threshold, gradientClippingD.FunctionParameters[i]); break; } }
public AdamW(T?alpha = null, T?beta1 = null, T?beta2 = null, T?epsilon = null, T?eta = null, T weightDecayRate = default(T)) : base(alpha: alpha, beta1: beta1, beta2: beta2, epsilon: epsilon, eta: eta) { WeightDecayRate = weightDecayRate; switch (this) { case AdamW <float> adamWF: adamWF.Update = () => OptimizerF.Update(adamWF); adamWF.UpdateFunctionParameters = (i) => AdamWF.UpdateFunctionParameters(adamWF.Alpha, adamWF.WeightDecayRate, adamWF.Beta1, adamWF.Beta2, adamWF.Epsilon, adamWF.Eta, UpdateCount, adamWF.FunctionParameters[i], adamWF.m[i], adamWF.v[i]); break; case AdamW <double> adamWD: adamWD.Update = () => OptimizerD.Update(adamWD); adamWD.UpdateFunctionParameters = (i) => AdamWD.UpdateFunctionParameters(adamWD.Alpha, adamWD.WeightDecayRate, adamWD.Beta1, adamWD.Beta2, adamWD.Epsilon, adamWD.Eta, UpdateCount, adamWD.FunctionParameters[i], adamWD.m[i], adamWD.v[i]); break; } }
public SGD(T?learningRate = null) { this.LearningRate = learningRate ?? (TVal <T>) 0.01; switch (this) { case SGD <float> sgdF: sgdF.Update = () => OptimizerF.Update(sgdF); sgdF.UpdateFunctionParameters = (i) => SGDF.UpdateFunctionParameters(sgdF.LearningRate, sgdF.FunctionParameters[i]); break; case SGD <double> sgdD: sgdD.Update = () => OptimizerD.Update(sgdD); sgdD.UpdateFunctionParameters = (i) => SGDD.UpdateFunctionParameters(sgdD.LearningRate, sgdD.FunctionParameters[i]); break; } }
public AdaGrad(T?learningRate = null, T?epsilon = null) { this.LearningRate = learningRate ?? (TVal <T>) 0.01; this.Epsilon = epsilon ?? (TVal <T>) 1e-8; switch (this) { case AdaGrad <float> adaGradF: adaGradF.Update = () => OptimizerF.Update(adaGradF); adaGradF.UpdateFunctionParameters = (i) => AdaGradF.UpdateFunctionParameters(adaGradF.LearningRate, adaGradF.Epsilon, adaGradF.h[i], adaGradF.FunctionParameters[i]); break; case AdaGrad <double> adaGradD: adaGradD.Update = () => OptimizerD.Update(adaGradD); adaGradD.UpdateFunctionParameters = (i) => AdaGradD.UpdateFunctionParameters(adaGradD.LearningRate, adaGradD.Epsilon, adaGradD.h[i], adaGradD.FunctionParameters[i]); break; } }
public MomentumSGD(T?learningRate = null, T?momentum = null) { this.LearningRate = learningRate ?? (TVal <T>) 0.01; this.Momentum = momentum ?? (TVal <T>) 0.9; switch (this) { case MomentumSGD <float> momentumSgdF: momentumSgdF.Update = () => OptimizerF.Update(momentumSgdF); momentumSgdF.UpdateFunctionParameters = (i) => MomentumSGDF.UpdateFunctionParameters(momentumSgdF.LearningRate, momentumSgdF.Momentum, momentumSgdF.v[i], momentumSgdF.FunctionParameters[i]); break; case MomentumSGD <double> momentumSgdD: momentumSgdD.Update = () => OptimizerD.Update(momentumSgdD); momentumSgdD.UpdateFunctionParameters = (i) => MomentumSGDD.UpdateFunctionParameters(momentumSgdD.LearningRate, momentumSgdD.Momentum, momentumSgdD.v[i], momentumSgdD.FunctionParameters[i]); break; } }
public AdaDelta(T?rho = null, T?epsilon = null) { this.Rho = rho ?? (TVal <T>) 0.95; this.Epsilon = epsilon ?? (TVal <T>) 1e-6; switch (this) { case AdaDelta <float> adaDeltaF: adaDeltaF.Update = () => OptimizerF.Update(adaDeltaF); adaDeltaF.UpdateFunctionParameters = (i) => AdaDeltaF.UpdateFunctionParameters(adaDeltaF.msg[i], adaDeltaF.msdx[i], adaDeltaF.Rho, adaDeltaF.Epsilon, adaDeltaF.FunctionParameters[i]); break; case AdaDelta <double> adaDeltaD: adaDeltaD.Update = () => OptimizerD.Update(adaDeltaD); adaDeltaD.UpdateFunctionParameters = (i) => AdaDeltaD.UpdateFunctionParameters(adaDeltaD.msg[i], adaDeltaD.msdx[i], adaDeltaD.Rho, adaDeltaD.Epsilon, adaDeltaD.FunctionParameters[i]); break; } }
public AdaBound(T?alpha = null, T?beta1 = null, T?beta2 = null, T?finalLr = null, T?gamma = null, T?epsilon = null, T?eta = null) : base(alpha: alpha, beta1: beta1, beta2: beta2, epsilon: epsilon, eta: eta) { this.InitialAlpha = alpha ?? (TVal <T>) 0.001; this.FinalLr = finalLr ?? (TVal <T>) 0.1; this.Gamma = gamma ?? (TVal <T>) 1e-3; switch (this) { case AdaBound <float> adaBoundF: adaBoundF.Update = () => OptimizerF.Update(adaBoundF); adaBoundF.UpdateFunctionParameters = (i) => AdaBoundF.UpdateFunctionParameters(adaBoundF.Alpha, adaBoundF.InitialAlpha, adaBoundF.Gamma, adaBoundF.Beta1, adaBoundF.Beta2, adaBoundF.Epsilon, adaBoundF.Eta, UpdateCount, adaBoundF.FunctionParameters[i], adaBoundF.m[i], adaBoundF.v[i], ref adaBoundF.FinalLr, out adaBoundF.Lower, out adaBoundF.Upper, adaBoundF.Clip); break; case AdaBound <double> adaBoundD: adaBoundD.Update = () => OptimizerD.Update(adaBoundD); adaBoundD.UpdateFunctionParameters = (i) => AdaBoundD.UpdateFunctionParameters(adaBoundD.Alpha, adaBoundD.InitialAlpha, adaBoundD.Gamma, adaBoundD.Beta1, adaBoundD.Beta2, adaBoundD.Epsilon, adaBoundD.Eta, UpdateCount, adaBoundD.FunctionParameters[i], adaBoundD.m[i], adaBoundD.v[i], ref adaBoundD.FinalLr, out adaBoundD.Lower, out adaBoundD.Upper, adaBoundD.Clip); break; } }
public RMSprop(T?learningRate = null, T?alpha = null, T?epsilon = null) { this.LearningRate = learningRate ?? (TVal <T>) 0.01; this.Alpha = alpha ?? (TVal <T>) 0.99; this.Epsilon = epsilon ?? (TVal <T>) 1e-8; switch (this) { case RMSprop <float> rmsPropF: rmsPropF.Update = () => OptimizerF.Update(rmsPropF); rmsPropF.UpdateFunctionParameters = (i) => RMSpropF.UpdateFunctionParameters(rmsPropF.LearningRate, rmsPropF.Alpha, rmsPropF.Epsilon, rmsPropF.FunctionParameters[i], rmsPropF.ms[i]); break; case RMSprop <double> rmsPropD: rmsPropD.Update = () => OptimizerD.Update(rmsPropD); rmsPropD.UpdateFunctionParameters = (i) => RMSpropD.UpdateFunctionParameters(rmsPropD.LearningRate, rmsPropD.Alpha, rmsPropD.Epsilon, rmsPropD.FunctionParameters[i], rmsPropD.ms[i]); break; } }
public Adam(T?alpha = null, T?beta1 = null, T?beta2 = null, T?epsilon = null, T?eta = null) { this.Alpha = alpha ?? (TVal <T>) 0.001; this.Beta1 = beta1 ?? (TVal <T>) 0.9; this.Beta2 = beta2 ?? (TVal <T>) 0.999; this.Epsilon = epsilon ?? (TVal <T>) 1e-8; this.Eta = eta ?? (TVal <T>) 1.0; switch (this) { case Adam <float> adamF: adamF.Update = () => OptimizerF.Update(adamF); adamF.UpdateFunctionParameters = (i) => AdamF.UpdateFunctionParameters(adamF.Alpha, adamF.Beta1, adamF.Beta2, adamF.Epsilon, adamF.Eta, adamF.UpdateCount, adamF.FunctionParameters[i], adamF.m[i], adamF.v[i]); break; case Adam <double> adamD: adamD.Update = () => OptimizerD.Update(adamD); adamD.UpdateFunctionParameters = (i) => AdamD.UpdateFunctionParameters(adamD.Alpha, adamD.Beta1, adamD.Beta2, adamD.Epsilon, adamD.Eta, UpdateCount, adamD.FunctionParameters[i], adamD.m[i], adamD.v[i]); break; } }