public Tensor binary_crossentropy(Tensor output, Tensor target, bool from_logits = false) { log(new { output, target, from_logits }); var _output = new Variable(In(output).function); var _target = new Variable(In(target).function); if (from_logits) { _output = C.Sigmoid(_output); } // scale preds so that the class probas of each sample sum to 1 var eps = InConstant(epsilon()); var omeps = InConstant(1.0); // avoid numerical instability with _EPSILON clipping _output = C.Clip(_output, eps, omeps); var a = new Variable(C.Negate(C.ElementTimes(_target, C.Log(_output)))); var b = new Variable(C.Negate( C.Minus(C.ElementTimes(InConstant(1.0), _target), C.Minus(C.Log(InConstant(1.0)), _output)))); _output = a + b; return(Out(_output)); }
void create_network() { imageVariable = Util.inputVariable(input_shape, "image"); transformationVariable = Util.inputVariable(extra_input_shape, "transformation"); transformedImageVariable = Util.inputVariable(input_shape, "transformed_image"); network = create_transforming_autoencoder(num_capsules, input_shape, extra_input_shape, recognizer_dim, generator_dim); Logging.log_number_of_parameters(network, show_filters: false); var mse_normalizing_factor = C.Constant.Scalar(C.DataType.Float, 1.0 / network.Output.Shape.TotalSize, computeDevice); var squared_error = CC.SquaredError(network.Output, transformedImageVariable); var mse = CC.ElementTimes(squared_error, mse_normalizing_factor); loss_function = mse; eval_function = mse; learner = CC.AdamLearner( new C.ParameterVector(network.Parameters().ToArray()), new C.TrainingParameterScheduleDouble(learning_rate * batch_size, (uint)batch_size), new C.TrainingParameterScheduleDouble(0.9), true, new C.TrainingParameterScheduleDouble(0.99)); trainer = CC.CreateTrainer(network, loss_function, new C.LearnerVector(new C.Learner[] { learner })); evaluator = CC.CreateEvaluator(eval_function); }
public Tensor categorical_crossentropy(Tensor target, Tensor output, bool from_logits = false) { // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1480 var _output = In(output); var _target = In(target); if (from_logits) { var result = C.CrossEntropyWithSoftmax(_output, _target); // cntk's result shape is (batch, 1), while keras expect (batch, ) CNTK.Function r = C.Reshape(result, NDShape.CreateNDShape(new int[] { })); return(Out(r)); } else { // scale preds so that the class probas of each sample sum to 1 var o = C.ElementDivide(_output.function, C.ReduceSum(_output, Axis.EndStaticAxis())); var eps = Constant.Scalar(epsilon(), DeviceDescriptor.CPUDevice); var omeps = Constant.Scalar(1.0 - epsilon(), DeviceDescriptor.CPUDevice); // avoid numerical instability with _EPSILON clipping o = C.Clip(o, eps, omeps); CNTK.Function r = C.Negate(C.ReduceSum(C.ElementTimes(_target, C.Log(_output)), Axis.EndStaticAxis())); return(Out(r)); } }
public Tensor mul <T>(Tensor a, T b, string name = null) { if (name == null) { return(Out(C.ElementTimes(In(a), InGeneric(b)))); } return(Out(C.ElementTimes(In(a), InGeneric(b), name: name))); }
C.Function squash(C.Function vectors, string name, int axis) { var squared_values = CC.Square(vectors); var s_squared_sum = CC.ReduceSum(squared_values, new C.AxisVector(new C.Axis[] { new C.Axis(axis) }), keepDims: true); var epsilon = C.Constant.Scalar(C.DataType.Float, 1e-7, computeDevice); var one = C.Constant.Scalar(C.DataType.Float, 1.0, computeDevice); var normalize_factor = CC.Plus(CC.Sqrt(s_squared_sum), epsilon); var one_plus_s_squared_sum = CC.Plus(s_squared_sum, one); var scale = CC.ElementDivide(s_squared_sum, one_plus_s_squared_sum); scale = CC.ElementDivide(scale, normalize_factor); var result = CC.ElementTimes(scale, vectors, name); return(result); }
C.Function create_capsule_layer(C.Function inputs, int num_capsule, int dim_capsule, int routings, string name) { var inputs_shape = inputs.Output.Shape.Dimensions; var input_num_capsule = inputs_shape[0]; var input_dim_capsule = inputs_shape[1]; var W = new C.Parameter( new int[] { num_capsule, dim_capsule, input_num_capsule, input_dim_capsule }, C.DataType.Float, CC.GlorotUniformInitializer(), computeDevice, name: "W"); inputs = CC.Reshape(inputs, new int[] { 1, 1, input_num_capsule, input_dim_capsule }); // [1, 1, 1152, 8]) var inputs_hat = CC.ElementTimes(W, inputs); inputs_hat = CC.ReduceSum(inputs_hat, new C.Axis(3)); inputs_hat = CC.Squeeze(inputs_hat); C.Function outputs = null; var zeros = new C.Constant(new int[] { num_capsule, 1, input_num_capsule }, C.DataType.Float, 0, computeDevice); var b = CC.Combine(new C.VariableVector() { zeros }); for (int i = 0; i < routings; i++) { var c = CC.Softmax(b, new C.Axis(0)); var batch_dot_result = CC.ElementTimes(c, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(2)); batch_dot_result = CC.Squeeze(batch_dot_result); outputs = squash(batch_dot_result, name: $"squashed_{i}", axis: 1); if (i < (routings - 1)) { outputs = CC.Reshape(outputs, new int[] { num_capsule, dim_capsule, 1 }); batch_dot_result = CC.ElementTimes(outputs, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(1)); b = CC.Plus(b, batch_dot_result); } } outputs = CC.Combine(new C.VariableVector() { outputs }, name); return(outputs); }
C.Function get_mask_and_infer_from_last_dimension(C.Function inputs, C.Function mask) { if (mask == null) { var inputs_shape = inputs.Output.Shape.Dimensions.ToArray(); var ndims = inputs_shape.Length - 1; var x = CC.Sqrt(CC.ReduceSum(CC.Square(inputs), new C.Axis(ndims - 1))); x = CC.Squeeze(x); System.Diagnostics.Debug.Assert(x.Output.Shape.Dimensions.Count == 1); x = CC.Argmax(x, new C.Axis(0)); mask = CC.OneHotOp(x, numClass: (uint)inputs_shape[0], outputSparse: false, axis: new C.Axis(0)); } mask = CC.Reshape(mask, mask.Output.Shape.AppendShape(new int[] { 1 })); var masked = CC.ElementTimes(inputs, mask); masked = CC.Flatten(masked); masked = CC.Squeeze(masked); return(masked); }
C.Function create_capsule(string name, int[] input_shape, int[] extra_input_shape, int recognizer_dim, int generator_dim) { var input_dim = Util.np_prod(input_shape); var x = Util.placeholderVariable(input_shape, "x"); var extra_input = Util.placeholderVariable(extra_input_shape, "extra_input"); var x_flat = CC.Flatten(x); var recognition = Layers.Dense(x_flat, recognizer_dim, computeDevice, "recognition_layer", CC.Sigmoid); var probability = Layers.Dense(recognition, 1, computeDevice, "probability", CC.Sigmoid); var learnt_transformation = Layers.Dense(recognition, 2, computeDevice, "xy_prediction"); var learnt_transformation_extended = CC.Plus(learnt_transformation, extra_input, "learnt_transformation_extended"); var generation = Layers.Dense(learnt_transformation_extended, generator_dim, computeDevice, "generator_layer", CC.Sigmoid); var out_flat = Layers.Dense(generation, input_dim, computeDevice, "output"); out_flat = CC.ElementTimes(out_flat, probability); var output = CC.Reshape(out_flat, input_shape); return(output); }
public Tensor mul <T>(Tensor a, T b) { return(Out(C.ElementTimes(In(a), InGeneric(b)))); }
public Tensor mul(Tensor a, Tensor b) { return(Out(C.ElementTimes(In(a), In(b)))); }
public Tensor Mul(float a, Tensor b) { return(Out(C.ElementTimes(In(a, b.Shape), In(b), ""))); }
public Tensor Mul(Tensor a, float b) { return(Out(C.ElementTimes(In(a), In(b, a.Shape), ""))); }
void create_network() { imageVariable = Util.inputVariable(input_shape, "image"); var conv1 = Layers.Convolution2D( imageVariable, 256, new int[] { 9, 9 }, computeDevice, use_padding: false, activation: CC.ReLU, name: "conv1"); var primarycaps = create_primary_cap( conv1, dim_capsule: 8, n_channels: 32, kernel_size: new int[] { 9, 9 }, strides: new int[] { 2, 2 }, pad: false); var digitcaps = create_capsule_layer( primarycaps, num_capsule: 10, dim_capsule: 16, routings: routings, name: "digitcaps"); var out_caps = get_length_and_remove_last_dimension(digitcaps, name: "capsnet"); categoricalLabel = Util.inputVariable(new int[] { 10 }, "label"); var masked_by_y = get_mask_and_infer_from_last_dimension(digitcaps, CC.Combine(new C.VariableVector() { categoricalLabel })); var masked = get_mask_and_infer_from_last_dimension(digitcaps, null); var decoder = create_decoder(masked.Output.Shape.Dimensions.ToArray()); var decoder_output_training = Model.invoke_model(decoder, new C.Variable[] { masked_by_y }); var decoder_output_evaluation = Model.invoke_model(decoder, new C.Variable[] { masked }); network = CC.Combine(new C.VariableVector() { out_caps, decoder_output_training }, "overall_training_network"); Logging.log_number_of_parameters(network); // first component of the loss var y_true = categoricalLabel; var y_pred = out_caps; var digit_loss = CC.Plus( CC.ElementTimes(y_true, CC.Square(CC.ElementMax(DC(0), CC.Minus(DC(0.9), y_pred), ""))), CC.ElementTimes(DC(0.5), CC.ElementTimes(CC.Minus(DC(1), y_true), CC.Square(CC.ElementMax(DC(0), CC.Minus(y_pred, DC(0.1)), ""))))); digit_loss = CC.ReduceSum(digit_loss, C.Axis.AllStaticAxes()); // second component of the loss var num_pixels_at_output = Util.np_prod(decoder_output_training.Output.Shape.Dimensions.ToArray()); var squared_error = CC.SquaredError(decoder_output_training, imageVariable); var image_mse = CC.ElementDivide(squared_error, DC(num_pixels_at_output)); loss_function = CC.Plus(digit_loss, CC.ElementTimes(DC(0.35), image_mse)); eval_function = CC.ClassificationError(y_pred, y_true); learner = CC.AdamLearner( new C.ParameterVector(network.Parameters().ToArray()), new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size), new C.TrainingParameterScheduleDouble(0.9), true, new C.TrainingParameterScheduleDouble(0.99)); trainer = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner })); evaluator = CC.CreateEvaluator(eval_function); }