private CNTK.Function _reshape_dummy_dim(CNTK.Function x, params int[] axis) { // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L680 List <int> shape = In(x.Output.Shape).ToList(); var _axis = axis.Select(i => i < 0 ? (i + shape.Count) : i).ToArray(); if (shape.Count(s => s == NDShape.InferredDimension) > 1) { var result = x; foreach (int index in _axis.Sorted().Reverse()) { result = C.Reshape(result, replacementShape: NDShape.CreateNDShape(new int[] { }), beginAxis: new Axis(index), endAxis: new Axis(index + 1)); } return(result); } else { foreach (int index in _axis.Sorted().Reverse()) { shape.RemoveAt(index); } return(C.Reshape(x, NDShape.CreateNDShape(shape))); } }
void create_network() { Console.WriteLine("Compute Device: " + computeDevice.AsString()); imageVariable = Util.inputVariable(new int[] { 28, 28, 1 }, "image_tensor"); categoricalVariable = Util.inputVariable(new int[] { 10 }, "label_tensor"); network = imageVariable; network = Layers.Convolution2D(network, 32, new int[] { 3, 3 }, computeDevice, CC.ReLU); network = CC.Pooling(network, C.PoolingType.Max, new int[] { 2, 2 }, new int[] { 2 }); network = Layers.Convolution2D(network, 64, new int[] { 3, 3 }, computeDevice, CC.ReLU); network = CC.Pooling(network, C.PoolingType.Max, new int[] { 2, 2 }, new int[] { 2 }); network = Layers.Convolution2D(network, 64, new int[] { 3, 3 }, computeDevice, CC.ReLU); network = Layers.Dense(network, 64, computeDevice, activation: CC.ReLU); network = Layers.Dense(network, 10, computeDevice); Logging.detailed_summary(network); Logging.log_number_of_parameters(network); loss_function = CC.CrossEntropyWithSoftmax(network, categoricalVariable); eval_function = CC.ClassificationError(network, categoricalVariable); learner = CC.AdamLearner( new C.ParameterVector(network.Parameters().ToArray()), new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size), new C.TrainingParameterScheduleDouble(0.9), true, new C.TrainingParameterScheduleDouble(0.99)); trainer = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner })); evaluator = CC.CreateEvaluator(eval_function); }
/// <summary> /// Evaluate the model. /// </summary> /// <param name="featureData">The data to evaluate the model on</param> /// <param name="model">The model to use (defaults to trained model)</param> /// <returns>The output of the model</returns> public IList <IList <float> > Evaluate(float[][] featureData, CNTK.Function model = null) { // get the current model if (model == null) { model = this.Model; } // get the current batch var featureBatch = (SequenceLength == 1) ? features.GetBatch(featureData, 0, featureData.Length) : features.GetSequenceBatch(SequenceLength, featureData, 0, featureData.Length); // evaluate the model var inputs = new Dictionary <CNTK.Variable, CNTK.Value>() { { features, featureBatch } }; var outputs = new Dictionary <CNTK.Variable, CNTK.Value>() { { model.Output, null } }; model.Evaluate(inputs, outputs, NetUtil.CurrentDevice); // return result var result = outputs[model.Output]; var outputData = result.GetDenseData <float>(model.Output); return(outputData); }
public Tensor categorical_crossentropy(Tensor target, Tensor output, bool from_logits = false) { // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1480 var _output = In(output); var _target = In(target); if (from_logits) { var result = C.CrossEntropyWithSoftmax(_output, _target); // cntk's result shape is (batch, 1), while keras expect (batch, ) CNTK.Function r = C.Reshape(result, NDShape.CreateNDShape(new int[] { })); return(Out(r)); } else { // scale preds so that the class probas of each sample sum to 1 var o = C.ElementDivide(_output.function, C.ReduceSum(_output, Axis.EndStaticAxis())); var eps = Constant.Scalar(epsilon(), DeviceDescriptor.CPUDevice); var omeps = Constant.Scalar(1.0 - epsilon(), DeviceDescriptor.CPUDevice); // avoid numerical instability with _EPSILON clipping o = C.Clip(o, eps, omeps); CNTK.Function r = C.Negate(C.ReduceSum(C.ElementTimes(_target, C.Log(_output)), Axis.EndStaticAxis())); return(Out(r)); } }
/// <summary> /// Loads the model. /// </summary> /// <exception cref="Exception">Invalid model selected!</exception> public void LoadModel() { try { string modelFile = ""; string baseFolder = string.Format("{0}\\SiaNet\\models", Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData)); switch (model) { case Cifar10Model.ResNet110: Downloader.DownloadModel(PreTrainedModelPath.Cifar10Path.ResNet110); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.Cifar10Path.ResNet110); break; case Cifar10Model.ResNet20: Downloader.DownloadModel(PreTrainedModelPath.Cifar10Path.ResNet20); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.Cifar10Path.ResNet20); break; default: throw new Exception("Invalid model selected!"); } modelFunc = Function.Load(modelFile, GlobalParameters.Device); Logging.WriteTrace("Model loaded."); } catch (Exception ex) { Logging.WriteTrace(ex); throw ex; } }
public void LoadModel(string path) { path = Path.GetDirectoryName(path) + "/" + Path.GetFileNameWithoutExtension(path) + ".dnn"; if (!File.Exists(path)) { throw new FileNotFoundException($"Model file not found for prediction: {path}"); } if (path.Equals(LastModelLoaded)) { return; } try { Model = CNTKHelper.LoadModel(path); var info = Path.GetDirectoryName(path) + "/" + Path.GetFileNameWithoutExtension(path) + ".txt"; List <string> lines = new List <string>(); using (StreamReader sr = new StreamReader(info)) { while (sr.Peek() >= 0) { lines.Add(sr.ReadLine()); } } var shape = lines[2].Replace(" ", "").Replace("(", "").Replace(")", "").Split(','); Predict(new DenseMatrix(Int32.Parse(shape[1]), Int32.Parse(shape[2]))); LastModelLoaded = path; } catch (Exception ex) { Console.WriteLine("Error: {0}\nCallStack: {1}\n Inner Exception: {2}", ex.Message, ex.StackTrace, ex.InnerException != null ? ex.InnerException.Message : "No Inner Exception"); throw ex; } }
private CNTK.Function _remove_dims(CNTK.Function x, int[] axis, bool keepdims = false) { if (keepdims == false) { return(_reshape_dummy_dim(x, axis)); } return(x); }
public NodeWalk(CNTK.Function model, INodeWalker walker) { _model = model; _walker = walker; _visited = new HashSet <Variable>(); WalkToFunction(_model, 0); walker.Complete(); }
public Tensor argmax(Tensor x, int axis = -1) { // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L745 var _axis = new Axis(axis); var _x = In(x); CNTK.Function output = C.Argmax(_x.function, _axis); output = _reshape_dummy_dim(output, axis); return(Out(output)); }
C.Function get_length_and_remove_last_dimension(C.Function x, string name) { var number_dimensions = x.Output.Shape.Dimensions.Count; x = CC.Square(x); var sum_entries = CC.ReduceSum(x, new C.Axis(number_dimensions - 1)); var epsilon = C.Constant.Scalar(C.DataType.Float, 1e-7, computeDevice); x = CC.Sqrt(CC.Plus(sum_entries, epsilon)); x = CC.Squeeze(x); return(x); }
private static void OnFunction(CNTK.Function v) { foreach (var vv in v.Outputs) { //if (vv.Uid.Contains("Reshape6")) // throw new Exception(); //if (vv.Uid.Contains("142")) // throw new Exception(); //if (vv.Uid.Contains("143")) // throw new Exception(); } }
private CNTK.Function _remove_dims(CNTK.Function x, int[] axis, bool keepdims = false) { log(new { x, axis, keepdims }); using (this.name_scope("_remove_dims")) { if (keepdims == false) { return(_reshape_dummy_dim(x, axis)); } return(x); } }
C.Function squash(C.Function vectors, string name, int axis) { var squared_values = CC.Square(vectors); var s_squared_sum = CC.ReduceSum(squared_values, new C.AxisVector(new C.Axis[] { new C.Axis(axis) }), keepDims: true); var epsilon = C.Constant.Scalar(C.DataType.Float, 1e-7, computeDevice); var one = C.Constant.Scalar(C.DataType.Float, 1.0, computeDevice); var normalize_factor = CC.Plus(CC.Sqrt(s_squared_sum), epsilon); var one_plus_s_squared_sum = CC.Plus(s_squared_sum, one); var scale = CC.ElementDivide(s_squared_sum, one_plus_s_squared_sum); scale = CC.ElementDivide(scale, normalize_factor); var result = CC.ElementTimes(scale, vectors, name); return(result); }
C.Function create_capsule_layer(C.Function inputs, int num_capsule, int dim_capsule, int routings, string name) { var inputs_shape = inputs.Output.Shape.Dimensions; var input_num_capsule = inputs_shape[0]; var input_dim_capsule = inputs_shape[1]; var W = new C.Parameter( new int[] { num_capsule, dim_capsule, input_num_capsule, input_dim_capsule }, C.DataType.Float, CC.GlorotUniformInitializer(), computeDevice, name: "W"); inputs = CC.Reshape(inputs, new int[] { 1, 1, input_num_capsule, input_dim_capsule }); // [1, 1, 1152, 8]) var inputs_hat = CC.ElementTimes(W, inputs); inputs_hat = CC.ReduceSum(inputs_hat, new C.Axis(3)); inputs_hat = CC.Squeeze(inputs_hat); C.Function outputs = null; var zeros = new C.Constant(new int[] { num_capsule, 1, input_num_capsule }, C.DataType.Float, 0, computeDevice); var b = CC.Combine(new C.VariableVector() { zeros }); for (int i = 0; i < routings; i++) { var c = CC.Softmax(b, new C.Axis(0)); var batch_dot_result = CC.ElementTimes(c, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(2)); batch_dot_result = CC.Squeeze(batch_dot_result); outputs = squash(batch_dot_result, name: $"squashed_{i}", axis: 1); if (i < (routings - 1)) { outputs = CC.Reshape(outputs, new int[] { num_capsule, dim_capsule, 1 }); batch_dot_result = CC.ElementTimes(outputs, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(1)); b = CC.Plus(b, batch_dot_result); } } outputs = CC.Combine(new C.VariableVector() { outputs }, name); return(outputs); }
C.Function get_mask_and_infer_from_last_dimension(C.Function inputs, C.Function mask) { if (mask == null) { var inputs_shape = inputs.Output.Shape.Dimensions.ToArray(); var ndims = inputs_shape.Length - 1; var x = CC.Sqrt(CC.ReduceSum(CC.Square(inputs), new C.Axis(ndims - 1))); x = CC.Squeeze(x); System.Diagnostics.Debug.Assert(x.Output.Shape.Dimensions.Count == 1); x = CC.Argmax(x, new C.Axis(0)); mask = CC.OneHotOp(x, numClass: (uint)inputs_shape[0], outputSparse: false, axis: new C.Axis(0)); } mask = CC.Reshape(mask, mask.Output.Shape.AppendShape(new int[] { 1 })); var masked = CC.ElementTimes(inputs, mask); masked = CC.Flatten(masked); masked = CC.Squeeze(masked); return(masked); }
/// <summary> /// Reshapes the dataset to new specified shape. /// </summary> /// <param name="shape">The new shape on the dataset.</param> /// <exception cref="System.ArgumentException"></exception> public void Reshape(params int[] shape) { CNTK.Variable features = CNTK.Variable.InputVariable(new int[] { Shape[1], Shape[0] }, DataType.Float); int total = Shape.Aggregate((d1, d2) => d1 * d2); if (shape.Aggregate((d1, d2) => d1 * d2) != total) { throw new ArgumentException(string.Format("Cannot reshape array of size {0} into shape {1}", total, string.Concat(shape))); } //shape.ToList().Insert(0, Data.Count); CNTK.Variable outfeatures = CNTK.Variable.InputVariable(shape, DataType.Float); //Variable outfeatures = new Variable(shape, VariableKind.Output, DataType.Float, null, false, new AxisVector(), false, "", ""); CNTK.Function reshapeFunc = CNTKLib.Reshape(features, shape); List <float> vectorData = new List <float>(); foreach (var item in Data) { vectorData.AddRange(item); } Value v = Value.CreateBatch <float>(Shape, vectorData, GlobalParameters.Device); Dictionary <CNTK.Variable, Value> inputs = new Dictionary <CNTK.Variable, Value>() { { features, v } }; Dictionary <CNTK.Variable, Value> outputs = new Dictionary <CNTK.Variable, Value>() { { outfeatures, null } }; reshapeFunc.Evaluate(inputs, outputs, GlobalParameters.Device); var res = outputs[outfeatures].GetDenseData <float>(outfeatures); Data = new List <List <float> >(); foreach (var item in res) { Data.Add(item.ToList()); } }
C.Function create_primary_cap(C.Function inputs, int dim_capsule, int n_channels, int[] kernel_size, int[] strides, bool pad) { var output = Layers.Convolution2D( inputs, dim_capsule * n_channels, kernel_size, computeDevice, strides: strides, use_padding: pad, name: "primarycap_conv2d"); var outputShape = output.Output.Shape.Dimensions; System.Diagnostics.Debug.Assert((outputShape[2] == 256) && (outputShape[1] == 6) && (outputShape[0] == 6)); var num_rows = (int)(Util.np_prod(outputShape.ToArray()) / dim_capsule); var target_shape = new int[] { num_rows, dim_capsule }; var outputs = CC.Reshape(output, target_shape, name: "primarycap_reshape"); var rtrn = squash(outputs, name: "primarycap_squash", axis: 1); return(rtrn); }
private Tensor _reduce(Tensor x, int[] axis, bool keepdims, Func <Variable, AxisVector, CNTK.Function> func) { var _x = In(x); Axis[] _axis; if (axis == null) { _axis = new[] { Axis.AllAxes() } } ; _axis = axis.Select(a => new Axis(a)).ToArray(); // Axes in reduce operations are 1-based (?) CNTK.Function f = _x; if (axis.Length > 0) { f = func(_x, new AxisVector(_axis)); } f = _remove_dims(f, axis, keepdims); return(Out(f)); }
public CNTKTensor(CNTK.Function function) { this.function = function; }
/// <summary> /// Loads the model. /// </summary> /// <exception cref="Exception">Invalid model selected!</exception> public void LoadModel() { try { string modelFile = ""; string baseFolder = string.Format("{0}\\SiaNet\\models", Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData)); switch (model) { case ImageNetModel.AlexNet: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.AlexNet); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.AlexNet); break; case ImageNetModel.InceptionV3: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.InceptionV3); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.InceptionV3); break; case ImageNetModel.ResNet18: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.ResNet18); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.ResNet18); break; case ImageNetModel.ResNet34: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.ResNet34); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.ResNet34); break; case ImageNetModel.ResNet50: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.ResNet50); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.ResNet50); break; case ImageNetModel.ResNet101: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.ResNet101); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.ResNet101); break; case ImageNetModel.ResNet152: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.ResNet152); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.ResNet152); break; case ImageNetModel.VGG16: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.VGG16); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.VGG16); break; case ImageNetModel.VGG19: Downloader.DownloadModel(PreTrainedModelPath.ImageNetPath.VGG19); modelFile = baseFolder + "\\" + Path.GetFileName(PreTrainedModelPath.ImageNetPath.VGG19); break; default: throw new Exception("Invalid model selected!"); } modelFunc = Function.Load(modelFile, GlobalParameters.Device); Logging.WriteTrace("Model loaded."); } catch (Exception ex) { Logging.WriteTrace(ex); throw ex; } }
public CNTKFunction(CNTKBackend c, List <Variable> inputs, CNTK.Function[] outputs, List <List <Tensor> > updates, string name) { this.c = c; this.placeholders = inputs; this.trainer = null; this.unrelated_updates = null; this.updates = updates; if (updates.Count > 0) { if (len(outputs) <= 0) { throw new Exception(); } this.loss = outputs[0]; // need group update by gradient place holder var u_ops = new List <CNTK.Function>(); var unrelated_updates = new List <CNTK.Function>(); foreach (List <Tensor> update in updates) { CNTK.Function u; if (update.Count == 1) { u = c.In(update[0]); } else if (update.Count == 2) { u = C.Assign(c.In(update[0]), c.In(update[1])); } else { throw new NotImplementedException(); } if (u.Inputs.Count == 0) { u_ops.Add(u); } else { unrelated_updates.Add(u); } } var update_func = C.Combine(new VariableVector(u_ops.Select(u => u.Output).ToArray())); CNTK.Function[] grads = update_func.FindAllWithName("keras_grad_placeholder").ToArray(); var u_list = new List <CNTK.Function>(); var p_list = new List <CNTK.Parameter>(); foreach (CNTK.Function g in grads) { if (c.grad_parameter_dict.ContainsKey(g)) { p_list.Add(c.grad_parameter_dict[g]); u_list.Add(g); } else { throw new Exception($"CNTK backend: when constructing trainer, found gradient node {g} which is not related to any parameters in the model. Please double check how the gradient node is constructed."); } } if (len(u_list) > 0) { Learner learner = Learner.SGDLearner(p_list, new TrainingParameterScheduleDouble(0)); var criterion = (len(outputs) > 1) ? C.Combine(new VariableVector(new[] { outputs[0], outputs[1] })) : outputs[0]; this.trainer = Trainer.CreateTrainer(model: outputs[0], lossFunction: criterion, evaluationFunction: null, parameterLearners: new[] { learner }); this.trainer_output = new UnorderedMapVariableValuePtr(); foreach (CNTK.Function f in outputs) { this.trainer_output.Add(f, null); } } else if (len(u_ops) > 0) { unrelated_updates.AddRange(u_ops); } if (len(unrelated_updates) > 0) { this.unrelated_updates = C.Combine(new VariableVector(unrelated_updates.Select(_ => _.Output).ToArray())); } } if (this.trainer == null) { this.metrics_outputs = outputs.Select(f => f.Output).ToArray(); this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); // cntk only could handle loss and 1 metric in trainer, for metrics more // than 2, need manual eval } else if (len(outputs) > 2) { this.metrics_outputs = Matrix.Get(outputs, 2, 0).Select(f => f.Output).ToArray(); this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); } else { this.metrics_func = null; } }
public CompiledModel(Function model) { Model = model; LabelVariable = Variable.InputVariable(new[] { Model.Output.Shape[0] }, DataType.Float); FeatureVariable = Model.Inputs.FirstOrDefault(variable => variable.IsInput); }
static void Main(string[] args) { // unzip archive if (!Directory.Exists("cat")) { Console.WriteLine("Unpacking data...."); DataUtil.Unzip(@"..\..\..\..\..\catsanddogs.zip", "."); } // create mapping files if (!File.Exists("train_map.txt")) { Console.WriteLine("Creating mapping files..."); CreateMapFiles(); } // get a training and validation image reader var trainingReader = DataUtil.GetImageReader("train_map.txt", imageWidth, imageHeight, numChannels, 2, randomizeData: true, augmentData: true); var validationReader = DataUtil.GetImageReader("validation_map.txt", imageWidth, imageHeight, numChannels, 2, randomizeData: false, augmentData: false); // build features and labels var features = NetUtil.Var(new int[] { imageHeight, imageWidth, numChannels }, DataType.Float); var labels = NetUtil.Var(new int[] { 2 }, DataType.Float); // ****************** // ADD YOUR CODE HERE // ****************** CNTK.Function network = null; // fix this line! // print the network to the console Console.WriteLine("Neural Network architecture: "); Console.WriteLine(network.ToSummary()); // set up the loss function and the classification error function var lossFunc = CNTKLib.CrossEntropyWithSoftmax(network.Output, labels); var errorFunc = CNTKLib.ClassificationError(network.Output, labels); // use the Adam learning algorithm var learner = network.GetAdamLearner( learningRateSchedule: (0.0001, 1), momentumSchedule: (0.99, 1)); // set up a trainer and an evaluator var trainer = network.GetTrainer(learner, lossFunc, errorFunc); var evaluator = network.GetEvaluator(errorFunc); // declare some variables var result = 0.0; var sampleCount = 0; var batchCount = 0; var lines = new List <List <double> >() { new List <double>(), new List <double>() }; // train the network during several epochs Console.WriteLine("Training the neural network...."); for (int epoch = 0; epoch < maxEpochs; epoch++) { Console.Write($"[{DateTime.Now:HH:mm:ss}] Training epoch {epoch+1}/{maxEpochs}... "); // train the network using random batches result = 0.0; sampleCount = 0; batchCount = 0; while (sampleCount < 2 * trainingSetSize) { // get the current batch var batch = trainingReader.GetBatch(batchSize); var featuresBatch = batch[trainingReader.StreamInfo("features")]; var labelsBatch = batch[trainingReader.StreamInfo("labels")]; // train the network on the batch var(Loss, Evaluation) = trainer.TrainBatch( new[] {
/// <summary> /// Train the model. /// </summary> /// <param name="threshold"></param> public void Train(double threshold = 0) { // create model and variables features = CreateFeatureVariable(); labels = CreateLabelVariable(); Model = CreateModel(features); AssertSequenceLength(); // set up loss function CNTK.Function lossFunction = null; switch (lossFunctionType) { case LossFunctionType.BinaryCrossEntropy: lossFunction = CNTK.CNTKLib.BinaryCrossEntropy(Model, labels); break; case LossFunctionType.MSE: lossFunction = CNTK.CNTKLib.SquaredError(Model, labels); break; case LossFunctionType.CrossEntropyWithSoftmax: lossFunction = CNTK.CNTKLib.CrossEntropyWithSoftmax(Model, labels); break; case LossFunctionType.Custom: lossFunction = CustomLossFunction(); break; } // set up accuracy function CNTK.Function accuracy_function = null; switch (accuracyFunctionType) { case AccuracyFunctionType.SameAsLoss: accuracy_function = lossFunction; break; case AccuracyFunctionType.BinaryAccuracy: accuracy_function = NetUtil.BinaryAccuracy(Model, labels); break; } // set up an adam learner var learner = Model.GetAdamLearner( (LearningRate, (uint)BatchSize), // remove batch_size? (0.9, (uint)BatchSize), // remove batch_size? unitGain: false); // set up trainer trainer = CNTK.CNTKLib.CreateTrainer(Model, lossFunction, accuracy_function, new CNTK.LearnerVector() { learner }); // set up a scheduler to tweak the learning rate scheduler = new ReduceLROnPlateau(learner, LearningRate); // set up an evaluator if (validationFeatures != null) { evaluator = CNTK.CNTKLib.CreateEvaluator(accuracy_function); } // write the model summary Console.WriteLine(" Model architecture:"); Console.WriteLine(Model.ToSummary()); // clear the training curves TrainingCurves[0].Clear(); TrainingCurves[1].Clear(); // train for a certain number of epochs for (int epoch = 0; epoch < NumberOfEpochs; epoch++) { var epoch_start_time = DateTime.Now; // train and evaluate the model var epoch_training_metric = TrainBatches(); var epoch_validation_accuracy = EvaluateBatches(); // add to training curve TrainingCurves[0].Add(epoch_training_metric); TrainingCurves[1].Add(epoch_validation_accuracy); // write current loss and accuracy var elapsedTime = DateTime.Now.Subtract(epoch_start_time); if (metricType == MetricType.Accuracy) { Console.WriteLine($"Epoch {epoch + 1:D2}/{NumberOfEpochs}, Elapsed time: {elapsedTime.TotalSeconds:F3} seconds. " + $"Training Accuracy: {epoch_training_metric:F3}. Validation Accuracy: {epoch_validation_accuracy:F3}."); } else { Console.WriteLine($"Epoch {epoch + 1:D2}/{NumberOfEpochs}, Elapsed time: {elapsedTime.TotalSeconds:F3} seconds, Training Loss: {epoch_training_metric:F3}"); } // abort training if scheduler says so if (scheduler.Update(epoch_training_metric)) { break; } if ((threshold != 0) && (epoch_training_metric < threshold)) { break; } } }
private CNTKTensor tensor(CNTK.Function function) { return(new CNTKTensor(function)); }
public CNTKFunction(CNTKBackend c, Variable[] inputs, CNTK.Variable[] outputs, List <List <Tensor> > updates, string name) { // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1501 this.c = c; this.placeholders = inputs; this.trainer = null; this.unrelated_updates = null; this.updates = updates; if (updates.Count > 0) { if (len(outputs) <= 0) { throw new Exception(); } this.loss = outputs[0]; // need group update by gradient place holder var u_ops = new List <CNTK.Function>(); var unrelated_updates = new List <CNTK.Function>(); foreach (List <Tensor> update in updates) { CNTK.Function u; if (update.Count == 1) { u = c.In(update[0]); } else if (update.Count == 2) { u = C.Assign(c.In(update[0]), c.In(update[1])); } else { throw new NotImplementedException(); } if (u.Arguments.Count == 0) { u_ops.Add(u); } else { unrelated_updates.Add(u); } } var update_func = C.Combine(new VariableVector(u_ops.Select(u => u.Output).ToArray())); CNTK.Constant[] grads = update_func.Inputs.Where(x => x.Name == "keras_grad_placeholder").Select(x => new Constant(x)).ToArray(); var u_list = new List <CNTK.Constant>(); var p_list = new List <CNTK.Parameter>(); foreach (CNTK.Constant g in grads) { if (c.grad_parameter_dict.ContainsKey(g.Uid)) { p_list.Add(c.grad_parameter_dict[g.Uid]); u_list.Add(g); } else { throw new Exception($"CNTK backend: when constructing trainer, found gradient node {g} which is not related to any parameters in the model. Please double check how the gradient node is constructed."); } } if (len(u_list) > 0) { Learner learner = Learner.SGDLearner(p_list, new TrainingParameterScheduleDouble(1)); this.trainer = Trainer.CreateTrainer(model: outputs[0], lossFunction: outputs[0], evaluationFunction: outputs[1], parameterLearners: new[] { learner }); } else if (len(u_ops) > 0) { unrelated_updates.AddRange(u_ops); } if (len(unrelated_updates) > 0) { this.unrelated_updates = C.Combine(new VariableVector(unrelated_updates.Select(_ => _.Output).ToArray())); } } if (this.trainer == null) { this.metrics_outputs = outputs; this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); // cntk only could handle loss and 1 metric in trainer, for metrics more // than 2, need manual eval } else if (len(outputs) > 2) { this.metrics_outputs = Matrix.Get(outputs, 2, 0); this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); } else { this.metrics_func = null; } }