/// <summary> /// create model by w,h,c,outputClassNum /// </summary> /// <param name="w"></param> /// <param name="h"></param> /// <param name="c"></param> /// <param name="outputClassNum"></param> /// <param name="deviceName"></param> public FCN7(int w, int h, int c, int outputClassNum, string deviceName) { device = NP.CNTKHelper.GetDeviceByName(deviceName); //input output variable int[] inputDim = new int[] { w, h, c }; int[] outputDim = new int[] { outputClassNum }; inputVariable = Variable.InputVariable(NDShape.CreateNDShape(inputDim), DataType.Float, "inputVariable"); outputVariable = Variable.InputVariable(NDShape.CreateNDShape(outputDim), DataType.Float, "labelVariable"); //build model classifierOutput = CreateFullyChannelNetwork(inputVariable, c, outputClassNum); Function loss = CNTKLib.SquaredError(classifierOutput, outputVariable); Function pred = CNTKLib.ClassificationError(classifierOutput, outputVariable); //adam leaner ParameterVector parameterVector = new ParameterVector(classifierOutput.Parameters().ToList()); TrainingParameterScheduleDouble learningRateSchedule = new TrainingParameterScheduleDouble(0.00178125, BatchSize); TrainingParameterScheduleDouble momentumRateSchedule = new TrainingParameterScheduleDouble(0.9, BatchSize); Learner leaner = CNTKLib.AdamLearner(parameterVector, learningRateSchedule, momentumRateSchedule, true); //构造leaner方法 trainer = Trainer.CreateTrainer(classifierOutput, loss, pred, new List <Learner>() { leaner }); //TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(0.00178125, BatchSize); //0.00178125 //TrainingParameterScheduleDouble momentumTimeConstant = CNTKLib.MomentumAsTimeConstantSchedule(256); //IList<Learner> parameterLearners = new List<Learner>() { Learner.MomentumSGDLearner(classifierOutput.Parameters(), learningRatePerSample, momentumTimeConstant, true) }; //trainer = Trainer.CreateTrainer(classifierOutput, loss, pred, parameterLearners); }
/// <summary> /// The Vt factor in the Adam update rule scales the gradient inversely proportionally to the ℓ2 norm of the past gradients (via the vt−1 term) and current gradient. /// </summary> /// <param name="modelOutput">The model output.</param> /// <param name="learningRate">The learning rate.</param> /// <param name="momentum">The momentum.</param> /// <param name="varianceMomentum">The variance momentum.</param> /// <param name="unitGain">if set to <c>true</c> [unit gain].</param> /// <param name="epsilon">The epsilon.</param> /// <param name="regulizer">The regulizer.</param> /// <returns>Learner.</returns> private Learner Adamax(Function modelOutput, double learningRate = 0.002, double momentum = 0.9, double varianceMomentum = 0.999, bool unitGain = true, double epsilon = 1e-08, Regulizers regulizer = null) { CNTK.TrainingParameterScheduleDouble learningRatePerSample = new CNTK.TrainingParameterScheduleDouble(learningRate, 1); CNTK.TrainingParameterScheduleDouble momentumRate = new CNTK.TrainingParameterScheduleDouble(momentum, 1); CNTK.TrainingParameterScheduleDouble varianceMomentumRate = new CNTK.TrainingParameterScheduleDouble(varianceMomentum, 1); return(CNTKLib.AdamLearner(new ParameterVector(modelOutput.Parameters().ToList()), learningRatePerSample, momentumRate, unitGain, varianceMomentumRate, epsilon, true, GetAdditionalLearningOptions())); }
/// <summary> /// Adam (Adaptive Moment Estimation) is another method that computes adaptive learning rates for each parameter. /// In addition to storing an exponentially decaying average of past squared gradients vtvt like Adadelta, /// Adam also keeps an exponentially decaying average of past gradients, similar to momentum. /// Essentially Adam is RMSProp with momentum. /// https://arxiv.org/pdf/1412.6980.pdf. /// </summary> /// <param name="parameters">Learnable parameters of the model</param> /// <param name="learningRate">Learning rate. (Default is 0.001)</param> /// <param name="momentum">Momentum (default is 0.9). /// Note that this is the beta1 parameter in the Adam paper.</param> /// <param name="varianceMomentum">variance momentum schedule. (Default is 0.999). /// Note that this is the beta2 parameter in the Adam paper.</param> /// <param name="l1Regularization">L1 regularization term. (Default is 0, so no regularization)</param> /// <param name="l2Regularization">L2 regularization term. (Default is 0, so no regularization)</param> /// <param name="gradientClippingThresholdPerSample">clipping threshold per sample, defaults to infinity. (Default is infinity, so no clipping)</param> /// <param name="gradientClippingWithTruncation">clipping threshold per sample, defaults to infinity. (Default is true)</param> /// <param name="unitGain"></param> /// <param name="epsilon"></param> /// <returns></returns> public static Learner Adam(IList <Parameter> parameters, double learningRate = 0.001, double momentum = 0.9, double varianceMomentum = 0.999, double l1Regularization = 0.0, double l2Regularization = 0.0, double gradientClippingThresholdPerSample = double.PositiveInfinity, bool gradientClippingWithTruncation = true, bool unitGain = true, double epsilon = 1e-08f) { var learningRatePerSample = new TrainingParameterScheduleDouble(learningRate, 1); var momentumRate = new TrainingParameterScheduleDouble(momentum, 1); var varianceMomentumRate = new TrainingParameterScheduleDouble(varianceMomentum, 1); var options = SetAdditionalOptions(l1Regularization, l2Regularization, gradientClippingThresholdPerSample, gradientClippingWithTruncation); return(CNTKLib.AdamLearner(CntkUtilities.CreateParameterVector(parameters), learningRatePerSample, momentumRate, unitGain, varianceMomentumRate, epsilon, false, options)); }
protected override Learner GenerateLearner(IList <Parameter> parameters, TrainingParameterScheduleDouble learningRateSchedule) { var m = new TrainingParameterScheduleDouble(Momentum); var vm = new TrainingParameterScheduleDouble(VarianceMomentum); return(CNTKLib.AdamLearner(new ParameterVector(parameters.ToArray()), learningRateSchedule, m, !NoUnitGrain, vm, Epsilon, Adamax, Options)); }
/// <inheritdoc /> internal override Learner ToLearner(Function model) { var learningRatePerSample = new TrainingParameterScheduleDouble(LearningRate, 1); var momentumRate = new TrainingParameterScheduleDouble(Momentum, 1); var varianceMomentumRate = new TrainingParameterScheduleDouble(VarianceMomentum, 1); return(CNTKLib.AdamLearner(new ParameterVector(((CNTK.Function)model).Parameters().ToArray()), learningRatePerSample, momentumRate, UnitGain, varianceMomentumRate, Epsilon, false, GetAdditionalLearningOptions())); }
internal static Learner GetInitializer(IList <Parameter> parameters, NeuralNetworkSettingsEntity s) { var vector = new ParameterVector((ICollection)parameters); switch (s.Learner) { case NeuralNetworkLearner.Adam: return(CNTKLib.AdamLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false, s.LearningVarianceMomentum?.ToTrainParam())); case NeuralNetworkLearner.AdaDelta: return(CNTKLib.AdaDeltaLearner(vector, s.LearningRate.ToTrainParam())); case NeuralNetworkLearner.AdaGrad: return(CNTKLib.AdaGradLearner(vector, s.LearningRate.ToTrainParam())); case NeuralNetworkLearner.FSAdaGrad: return(CNTKLib.FSAdaGradLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false, s.LearningVarianceMomentum?.ToTrainParam())); case NeuralNetworkLearner.RMSProp: return(CNTKLib.FSAdaGradLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false, s.LearningVarianceMomentum?.ToTrainParam())); case NeuralNetworkLearner.MomentumSGD: return(CNTKLib.MomentumSGDLearner(vector, s.LearningRate.ToTrainParam(), s.LearningMomentum?.ToTrainParam(), s.LearningUnitGain ?? false)); case NeuralNetworkLearner.SGD: return(CNTKLib.SGDLearner(vector, s.LearningRate.ToTrainParam())); default: throw new InvalidOperationException("Unexpected Learner"); } }
public override Learner GetOptimizer(IList <Parameter> learningParameters) { var learningOptions = new AdditionalLearningOptions() { l1RegularizationWeight = _l1RegularizationWeight, l2RegularizationWeight = _l2RegularizationWeight, gradientClippingWithTruncation = _gradientClippingThresholdPerSample != double.PositiveInfinity, gradientClippingThresholdPerSample = _gradientClippingThresholdPerSample }; var learner = CNTKLib.AdamLearner( parameters: new ParameterVector((System.Collections.ICollection)learningParameters), learningRateSchedule: new TrainingParameterScheduleDouble(LearningRate, (uint)MinibatchSize), momentumSchedule: new TrainingParameterScheduleDouble(_momentum, (uint)MinibatchSize), unitGain: _unitGain, varianceMomentumSchedule: new TrainingParameterScheduleDouble(_varianceMomentumSchedule, (uint)MinibatchSize), epsilon: _epsilon, adamax: false, additionalOptions: learningOptions); return(learner); }
/// <summary> /// create from saved model /// </summary> /// <param name="model"></param> /// <param name="deviceName"></param> public FCN7(byte[] modelBuffer, string deviceName) { device = NP.CNTKHelper.GetDeviceByName(deviceName); Function model = Function.Load(modelBuffer, device); inputVariable = model.Inputs.First(v => v.Name == "inputVariable"); outputVariable = Variable.InputVariable(model.Output.Shape, DataType.Float, "labelVariable"); classifierOutput = model; Function loss = CNTKLib.SquaredError(classifierOutput, outputVariable); Function pred = CNTKLib.ClassificationError(classifierOutput, outputVariable); //adam leaner ParameterVector parameterVector = new ParameterVector(classifierOutput.Parameters().ToList()); TrainingParameterScheduleDouble learningRateSchedule = new TrainingParameterScheduleDouble(0.00178125, BatchSize); TrainingParameterScheduleDouble momentumRateSchedule = new TrainingParameterScheduleDouble(0.9, BatchSize); Learner leaner = CNTKLib.AdamLearner(parameterVector, learningRateSchedule, momentumRateSchedule, true); //构造leaner方法 trainer = Trainer.CreateTrainer(classifierOutput, loss, pred, new List <Learner>() { leaner }); }
/// <summary> /// Creates the learner based on learning parameters. /// ToDo: Not all learners parameters defined /// </summary> /// <param name="network">Network model being trained</param> /// <param name="lrParams">Learning parameters.</param> /// <returns></returns> private List <Learner> createLearners(Function network, LearningParameters lrParams) { //learning rate and momentum values var lr = new TrainingParameterScheduleDouble(lrParams.LearningRate); var mm = CNTKLib.MomentumAsTimeConstantSchedule(lrParams.Momentum); var addParam = new AdditionalLearningOptions(); // if (lrParams.L1Regularizer > 0) { addParam.l1RegularizationWeight = lrParams.L1Regularizer; } if (lrParams.L2Regularizer > 0) { addParam.l2RegularizationWeight = lrParams.L2Regularizer; } //SGD Momentum learner if (lrParams.LearnerType == LearnerType.MomentumSGDLearner) { // var llr = new List <Learner>(); var msgd = Learner.MomentumSGDLearner(network.Parameters(), lr, mm, true, addParam); llr.Add(msgd); return(llr); } //SGDLearner - rate and regulars else if (lrParams.LearnerType == LearnerType.SGDLearner) { // var llr = new List <Learner>(); var msgd = Learner.SGDLearner(network.Parameters(), lr, addParam); llr.Add(msgd); return(llr); } //FSAdaGradLearner learner - rate, moment regulars else if (lrParams.LearnerType == LearnerType.FSAdaGradLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.FSAdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, mm); llr.Add(msgd); return(llr); } //AdamLearner learner else if (lrParams.LearnerType == LearnerType.AdamLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.AdamLearner(new ParameterVector(network.Parameters().ToList()), lr, mm); llr.Add(msgd); return(llr); } //AdaGradLearner learner - Learning rate and regularizers else if (lrParams.LearnerType == LearnerType.AdaGradLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.AdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, false, addParam); llr.Add(msgd); return(llr); } else { throw new Exception("Learner type is not supported!"); } }
public static void Run_MNIST_Test() { // var device = DeviceDescriptor.UseDefaultDevice(); //dims var inDim = 784; var outDim = 10; // MNIST images are 28x28=784 pixels var input = CNTKLib.InputVariable(new NDShape(1, inDim), DataType.Float, "features"); var labels = CNTKLib.InputVariable(new NDShape(1, outDim), DataType.Float, "labels"); //create network var nnModel = createModel(input, outDim, 1, device); //Loss and Eval functions var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(nnModel, labels, "lossFunction"); var prediction = CNTKLib.ClassificationError(nnModel, labels, "classificationError"); //create learners and trainer // set per sample learning rate and momentum var learningRatePerSample = new CNTK.TrainingParameterScheduleDouble(0.001, 1); var momentumPerSample = new CNTK.TrainingParameterScheduleDouble(0.9, 1); var nnParams = nnModel.Parameters(); var parameterLearners = new List <Learner>() { CNTKLib.AdamLearner(new ParameterVector(nnModel.Parameters().ToList()), learningRatePerSample, momentumPerSample) }; var trainer = Trainer.CreateTrainer(nnModel, trainingLoss, prediction, parameterLearners); //create minibatch source var sConfigs = new StreamConfiguration[] { new StreamConfiguration("features", inDim), new StreamConfiguration("labels", outDim) }; //this file is huge and cannot be uploaded on GitHUb. //it can be downloaded from: https://github.com/Microsoft/CNTK/tree/987b22a8350211cb4c44278951857af1289c3666/Examples/Image/DataSets/MNIST var minibatchSource = MinibatchSource.TextFormatMinibatchSource("..\\..\\..\\Data\\MNIST-TrainData.txt", sConfigs, MinibatchSource.InfinitelyRepeat); var minibatchSize = (uint)754; var featureStreamInfo = minibatchSource.StreamInfo("features"); var labelStreamInfo = minibatchSource.StreamInfo("labels"); var maxIt = 250; var curIt = 1; while (true) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { input, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); // if (minibatchData[featureStreamInfo].sweepEnd) { if (curIt % 50 == 0 || curIt == 1) { printProgress(trainer, curIt); } curIt++; } if (maxIt <= curIt) { break; } } // save the trained model nnModel.Save("mnist_classifier"); // validate the model var minibatchSourceNewModel = MinibatchSource.TextFormatMinibatchSource("../../../data/MNIST-TestData.txt", sConfigs, MinibatchSource.InfinitelyRepeat); //prepare vars to accept results List <List <float> > X = new List <List <float> >(); List <float> Y = new List <float>(); //Model validation ValidateModel("mnist_classifier", minibatchSourceNewModel, new int[] { 28, 28 }, 10, "features", "labels", device, 1000, X, Y, false); //show image classification result showResult(X, Y); }
public override Learner Create(IList <Parameter> parameters) { return(CNTKLib.AdamLearner(new ParameterVector(parameters.ToArray()), new TrainingParameterScheduleDouble(lr), new TrainingParameterScheduleDouble(b1), true, new TrainingParameterScheduleDouble(b2), eps)); }
/// <summary> /// Train and evaluate an image classifier with CIFAR-10 data. /// The classification model is saved after training. /// For repeated runs, the caller may choose whether to retrain a model or /// just validate an existing one. /// </summary> /// <param name="device">CPU or GPU device to run</param> /// <param name="forceRetrain">whether to override an existing model. /// if true, any existing model will be overridden and the new one evaluated. /// if false and there is an existing model, the existing model is evaluated.</param> public static void TrainAndEvaluate(DeviceDescriptor device, bool forceRetrain) { string modelFile = Path.Combine(CifarDataFolder, "CNTK-CSharp.model"); // If a model already exists and not set to force retrain, validate the model and return. if (File.Exists(modelFile) && !forceRetrain) { ValidateModel(device, modelFile); return; } // prepare training data var minibatchSource = CreateMinibatchSource(Path.Combine(CifarDataFolder, "train_map.txt"), Path.Combine(CifarDataFolder, "CIFAR-10_mean.xml"), imageDim, numClasses, MaxEpochs); var imageStreamInfo = minibatchSource.StreamInfo("features"); var labelStreamInfo = minibatchSource.StreamInfo("labels"); // build a model var imageInput = CNTKLib.InputVariable(imageDim, imageStreamInfo.m_elementType, "Images"); var labelsVar = CNTKLib.InputVariable(new int[] { numClasses }, labelStreamInfo.m_elementType, "Labels"); var classifierOutput = ResNetClassifier(imageInput, numClasses, device, "classifierOutput"); // prepare for training var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labelsVar, "lossFunction"); var prediction = CNTKLib.ClassificationError(classifierOutput, labelsVar, 3, "predictionError"); //学习率策略 double[] lrs = { 3e-2, 3e-3, 3e-4, 3e-4, 5e-5 }; //学习率 int[] check_point = { 80, 120, 160, 180 }; //学习率在epoch到达多少时更新 uint minibatchSize = 32; PairSizeTDouble p1 = new PairSizeTDouble(80, lrs[0]); PairSizeTDouble p2 = new PairSizeTDouble(40, lrs[1]); PairSizeTDouble p3 = new PairSizeTDouble(40, lrs[2]); PairSizeTDouble p4 = new PairSizeTDouble(20, lrs[3]); PairSizeTDouble p5 = new PairSizeTDouble(20, lrs[4]); VectorPairSizeTDouble vp = new VectorPairSizeTDouble() { p1, p2, p3, p4, p5 }; int sample_num_in_a_epoch = 50000; TrainingParameterScheduleDouble learningRateSchedule = new TrainingParameterScheduleDouble(vp, (uint)sample_num_in_a_epoch); //动量 var momentum = new TrainingParameterScheduleDouble(0.9, 1); //SGD Learner //var sgdLearner = Learner.SGDLearner(classifierOutput.Parameters(), learningRateSchedule); //Adam Learner ParameterVector parameterVector = new ParameterVector(); foreach (var parameter in classifierOutput.Parameters()) { parameterVector.Add(parameter); } var adamLearner = CNTKLib.AdamLearner(parameterVector, learningRateSchedule, momentum); //Trainer var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, new List <Learner> { adamLearner }); int outputFrequencyInMinibatches = 20, miniBatchCount = 0; Stopwatch sw = new Stopwatch(); sw.Start(); // Feed data to the trainer for number of epochs. Console.WriteLine("*****************Train Start*****************"); while (true) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); // Stop training once max epochs is reached. if (minibatchData.empty()) { break; } trainer.TrainMinibatch(new Dictionary <Variable, MinibatchData>() { { imageInput, minibatchData[imageStreamInfo] }, { labelsVar, minibatchData[labelStreamInfo] } }, device); TestHelper.PrintTrainingProgress(trainer, adamLearner, miniBatchCount++, outputFrequencyInMinibatches); } // save the model var imageClassifier = Function.Combine(new List <Variable>() { trainingLoss, prediction, classifierOutput }, "ImageClassifier"); imageClassifier.Save(modelFile); Console.WriteLine("*****************Train Stop*****************"); // validate the model float acc = ValidateModel(device, modelFile); sw.Stop(); TimeSpan ts2 = sw.Elapsed; Console.WriteLine("*****************Validate Stop*****************"); string logstr = "Total time :" + ts2.TotalSeconds + "s. acc:" + acc; Console.WriteLine(logstr); int i = 1; while (System.IO.File.Exists("../../../../log_" + i.ToString() + ".txt")) { i++; } var file = System.IO.File.Create("../../../../log_" + i.ToString() + ".txt"); byte[] data = System.Text.Encoding.Default.GetBytes(logstr); file.Write(data, 0, data.Length); }