/// <summary> /// Creates a new Function from specified operands. /// </summary> /// <param name="operands"></param> /// <returns></returns> public static Function Combine(System.Collections.Generic.IEnumerable <Variable> operands) { var varVect = new VariableVector(); foreach (var v in operands) { varVect.Add(v); } return(CNTKLib.Combine(varVect)); }
C.Function create_capsule_layer(C.Function inputs, int num_capsule, int dim_capsule, int routings, string name) { var inputs_shape = inputs.Output.Shape.Dimensions; var input_num_capsule = inputs_shape[0]; var input_dim_capsule = inputs_shape[1]; var W = new C.Parameter( new int[] { num_capsule, dim_capsule, input_num_capsule, input_dim_capsule }, C.DataType.Float, CC.GlorotUniformInitializer(), computeDevice, name: "W"); inputs = CC.Reshape(inputs, new int[] { 1, 1, input_num_capsule, input_dim_capsule }); // [1, 1, 1152, 8]) var inputs_hat = CC.ElementTimes(W, inputs); inputs_hat = CC.ReduceSum(inputs_hat, new C.Axis(3)); inputs_hat = CC.Squeeze(inputs_hat); C.Function outputs = null; var zeros = new C.Constant(new int[] { num_capsule, 1, input_num_capsule }, C.DataType.Float, 0, computeDevice); var b = CC.Combine(new C.VariableVector() { zeros }); for (int i = 0; i < routings; i++) { var c = CC.Softmax(b, new C.Axis(0)); var batch_dot_result = CC.ElementTimes(c, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(2)); batch_dot_result = CC.Squeeze(batch_dot_result); outputs = squash(batch_dot_result, name: $"squashed_{i}", axis: 1); if (i < (routings - 1)) { outputs = CC.Reshape(outputs, new int[] { num_capsule, dim_capsule, 1 }); batch_dot_result = CC.ElementTimes(outputs, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(1)); b = CC.Plus(b, batch_dot_result); } } outputs = CC.Combine(new C.VariableVector() { outputs }, name); return(outputs); }
public CNTKFunction(CNTKBackend c, List <Variable> inputs, CNTK.Function[] outputs, List <List <Tensor> > updates, string name) { this.c = c; this.placeholders = inputs; this.trainer = null; this.unrelated_updates = null; this.updates = updates; if (updates.Count > 0) { if (len(outputs) <= 0) { throw new Exception(); } this.loss = outputs[0]; // need group update by gradient place holder var u_ops = new List <CNTK.Function>(); var unrelated_updates = new List <CNTK.Function>(); foreach (List <Tensor> update in updates) { CNTK.Function u; if (update.Count == 1) { u = c.In(update[0]); } else if (update.Count == 2) { u = C.Assign(c.In(update[0]), c.In(update[1])); } else { throw new NotImplementedException(); } if (u.Inputs.Count == 0) { u_ops.Add(u); } else { unrelated_updates.Add(u); } } var update_func = C.Combine(new VariableVector(u_ops.Select(u => u.Output).ToArray())); CNTK.Function[] grads = update_func.FindAllWithName("keras_grad_placeholder").ToArray(); var u_list = new List <CNTK.Function>(); var p_list = new List <CNTK.Parameter>(); foreach (CNTK.Function g in grads) { if (c.grad_parameter_dict.ContainsKey(g)) { p_list.Add(c.grad_parameter_dict[g]); u_list.Add(g); } else { throw new Exception($"CNTK backend: when constructing trainer, found gradient node {g} which is not related to any parameters in the model. Please double check how the gradient node is constructed."); } } if (len(u_list) > 0) { Learner learner = Learner.SGDLearner(p_list, new TrainingParameterScheduleDouble(0)); var criterion = (len(outputs) > 1) ? C.Combine(new VariableVector(new[] { outputs[0], outputs[1] })) : outputs[0]; this.trainer = Trainer.CreateTrainer(model: outputs[0], lossFunction: criterion, evaluationFunction: null, parameterLearners: new[] { learner }); this.trainer_output = new UnorderedMapVariableValuePtr(); foreach (CNTK.Function f in outputs) { this.trainer_output.Add(f, null); } } else if (len(u_ops) > 0) { unrelated_updates.AddRange(u_ops); } if (len(unrelated_updates) > 0) { this.unrelated_updates = C.Combine(new VariableVector(unrelated_updates.Select(_ => _.Output).ToArray())); } } if (this.trainer == null) { this.metrics_outputs = outputs.Select(f => f.Output).ToArray(); this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); // cntk only could handle loss and 1 metric in trainer, for metrics more // than 2, need manual eval } else if (len(outputs) > 2) { this.metrics_outputs = Matrix.Get(outputs, 2, 0).Select(f => f.Output).ToArray(); this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); } else { this.metrics_func = null; } }
/// <summary> /// Create a new Function instance which just combines the outputs of the specified list of 'operands' Functions such that the 'Outputs' of the /// new 'Function' are union of the 'Outputs' of each of the specified 'operands' Functions. /// E.g. When creating a classification model, typically the CrossEntropy loss Function and the ClassificationError Function comprise the two roots /// of the computation graph which can be "Combine"d to create a single Function with 2 outputs; viz. CrossEntropy loss and ClassificationError output. /// </summary> /// <param name="operands">variables whose function are to be combined</param> /// <param name="name"></param> /// <returns></returns> public static Function Combine(IList <Variable> operands, string name = "") { VariableVector operandVector = Helper.AsVariableVector(operands); return(CNTKLib.Combine(operandVector, name)); }
public CNTKFunction(CNTKBackend c, Variable[] inputs, CNTK.Variable[] outputs, List <List <Tensor> > updates, string name) { // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1501 this.c = c; this.placeholders = inputs; this.trainer = null; this.unrelated_updates = null; this.updates = updates; if (updates.Count > 0) { if (len(outputs) <= 0) { throw new Exception(); } this.loss = outputs[0]; // need group update by gradient place holder var u_ops = new List <CNTK.Function>(); var unrelated_updates = new List <CNTK.Function>(); foreach (List <Tensor> update in updates) { CNTK.Function u; if (update.Count == 1) { u = c.In(update[0]); } else if (update.Count == 2) { u = C.Assign(c.In(update[0]), c.In(update[1])); } else { throw new NotImplementedException(); } if (u.Arguments.Count == 0) { u_ops.Add(u); } else { unrelated_updates.Add(u); } } var update_func = C.Combine(new VariableVector(u_ops.Select(u => u.Output).ToArray())); CNTK.Constant[] grads = update_func.Inputs.Where(x => x.Name == "keras_grad_placeholder").Select(x => new Constant(x)).ToArray(); var u_list = new List <CNTK.Constant>(); var p_list = new List <CNTK.Parameter>(); foreach (CNTK.Constant g in grads) { if (c.grad_parameter_dict.ContainsKey(g.Uid)) { p_list.Add(c.grad_parameter_dict[g.Uid]); u_list.Add(g); } else { throw new Exception($"CNTK backend: when constructing trainer, found gradient node {g} which is not related to any parameters in the model. Please double check how the gradient node is constructed."); } } if (len(u_list) > 0) { Learner learner = Learner.SGDLearner(p_list, new TrainingParameterScheduleDouble(1)); this.trainer = Trainer.CreateTrainer(model: outputs[0], lossFunction: outputs[0], evaluationFunction: outputs[1], parameterLearners: new[] { learner }); } else if (len(u_ops) > 0) { unrelated_updates.AddRange(u_ops); } if (len(unrelated_updates) > 0) { this.unrelated_updates = C.Combine(new VariableVector(unrelated_updates.Select(_ => _.Output).ToArray())); } } if (this.trainer == null) { this.metrics_outputs = outputs; this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); // cntk only could handle loss and 1 metric in trainer, for metrics more // than 2, need manual eval } else if (len(outputs) > 2) { this.metrics_outputs = Matrix.Get(outputs, 2, 0); this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs)); } else { this.metrics_func = null; } }
void create_network() { imageVariable = Util.inputVariable(input_shape, "image"); var conv1 = Layers.Convolution2D( imageVariable, 256, new int[] { 9, 9 }, computeDevice, use_padding: false, activation: CC.ReLU, name: "conv1"); var primarycaps = create_primary_cap( conv1, dim_capsule: 8, n_channels: 32, kernel_size: new int[] { 9, 9 }, strides: new int[] { 2, 2 }, pad: false); var digitcaps = create_capsule_layer( primarycaps, num_capsule: 10, dim_capsule: 16, routings: routings, name: "digitcaps"); var out_caps = get_length_and_remove_last_dimension(digitcaps, name: "capsnet"); categoricalLabel = Util.inputVariable(new int[] { 10 }, "label"); var masked_by_y = get_mask_and_infer_from_last_dimension(digitcaps, CC.Combine(new C.VariableVector() { categoricalLabel })); var masked = get_mask_and_infer_from_last_dimension(digitcaps, null); var decoder = create_decoder(masked.Output.Shape.Dimensions.ToArray()); var decoder_output_training = Model.invoke_model(decoder, new C.Variable[] { masked_by_y }); var decoder_output_evaluation = Model.invoke_model(decoder, new C.Variable[] { masked }); network = CC.Combine(new C.VariableVector() { out_caps, decoder_output_training }, "overall_training_network"); Logging.log_number_of_parameters(network); // first component of the loss var y_true = categoricalLabel; var y_pred = out_caps; var digit_loss = CC.Plus( CC.ElementTimes(y_true, CC.Square(CC.ElementMax(DC(0), CC.Minus(DC(0.9), y_pred), ""))), CC.ElementTimes(DC(0.5), CC.ElementTimes(CC.Minus(DC(1), y_true), CC.Square(CC.ElementMax(DC(0), CC.Minus(y_pred, DC(0.1)), ""))))); digit_loss = CC.ReduceSum(digit_loss, C.Axis.AllStaticAxes()); // second component of the loss var num_pixels_at_output = Util.np_prod(decoder_output_training.Output.Shape.Dimensions.ToArray()); var squared_error = CC.SquaredError(decoder_output_training, imageVariable); var image_mse = CC.ElementDivide(squared_error, DC(num_pixels_at_output)); loss_function = CC.Plus(digit_loss, CC.ElementTimes(DC(0.35), image_mse)); eval_function = CC.ClassificationError(y_pred, y_true); learner = CC.AdamLearner( new C.ParameterVector(network.Parameters().ToArray()), new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size), new C.TrainingParameterScheduleDouble(0.9), true, new C.TrainingParameterScheduleDouble(0.99)); trainer = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner })); evaluator = CC.CreateEvaluator(eval_function); }