public Tensor binary_crossentropy(Tensor output, Tensor target, bool from_logits = false)
        {
            log(new { output, target, from_logits });

            var _output = new Variable(In(output).function);
            var _target = new Variable(In(target).function);

            if (from_logits)
            {
                _output = C.Sigmoid(_output);
            }

            // scale preds so that the class probas of each sample sum to 1
            var eps   = InConstant(epsilon());
            var omeps = InConstant(1.0);

            // avoid numerical instability with _EPSILON clipping
            _output = C.Clip(_output, eps, omeps);
            var a = new Variable(C.Negate(C.ElementTimes(_target, C.Log(_output))));
            var b = new Variable(C.Negate(
                                     C.Minus(C.ElementTimes(InConstant(1.0), _target),
                                             C.Minus(C.Log(InConstant(1.0)), _output))));

            _output = a + b;
            return(Out(_output));
        }
Beispiel #2
0
        void create_network()
        {
            imageVariable            = Util.inputVariable(input_shape, "image");
            transformationVariable   = Util.inputVariable(extra_input_shape, "transformation");
            transformedImageVariable = Util.inputVariable(input_shape, "transformed_image");
            network = create_transforming_autoencoder(num_capsules, input_shape, extra_input_shape, recognizer_dim, generator_dim);
            Logging.log_number_of_parameters(network, show_filters: false);

            var mse_normalizing_factor = C.Constant.Scalar(C.DataType.Float, 1.0 / network.Output.Shape.TotalSize, computeDevice);
            var squared_error          = CC.SquaredError(network.Output, transformedImageVariable);
            var mse = CC.ElementTimes(squared_error, mse_normalizing_factor);

            loss_function = mse;
            eval_function = mse;

            learner = CC.AdamLearner(
                new C.ParameterVector(network.Parameters().ToArray()),
                new C.TrainingParameterScheduleDouble(learning_rate * batch_size, (uint)batch_size),
                new C.TrainingParameterScheduleDouble(0.9),
                true,
                new C.TrainingParameterScheduleDouble(0.99));

            trainer = CC.CreateTrainer(network, loss_function, new C.LearnerVector(new C.Learner[] { learner }));

            evaluator = CC.CreateEvaluator(eval_function);
        }
Beispiel #3
0
        public Tensor categorical_crossentropy(Tensor target, Tensor output, bool from_logits = false)
        {
            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1480

            var _output = In(output);
            var _target = In(target);

            if (from_logits)
            {
                var result = C.CrossEntropyWithSoftmax(_output, _target);
                // cntk's result shape is (batch, 1), while keras expect (batch, )
                CNTK.Function r = C.Reshape(result, NDShape.CreateNDShape(new int[] { }));
                return(Out(r));
            }
            else
            {
                // scale preds so that the class probas of each sample sum to 1
                var o     = C.ElementDivide(_output.function, C.ReduceSum(_output, Axis.EndStaticAxis()));
                var eps   = Constant.Scalar(epsilon(), DeviceDescriptor.CPUDevice);
                var omeps = Constant.Scalar(1.0 - epsilon(), DeviceDescriptor.CPUDevice);
                // avoid numerical instability with _EPSILON clipping
                o = C.Clip(o, eps, omeps);
                CNTK.Function r = C.Negate(C.ReduceSum(C.ElementTimes(_target, C.Log(_output)), Axis.EndStaticAxis()));
                return(Out(r));
            }
        }
 public Tensor mul <T>(Tensor a, T b, string name = null)
 {
     if (name == null)
     {
         return(Out(C.ElementTimes(In(a), InGeneric(b))));
     }
     return(Out(C.ElementTimes(In(a), InGeneric(b), name: name)));
 }
Beispiel #5
0
        C.Function squash(C.Function vectors, string name, int axis)
        {
            var squared_values         = CC.Square(vectors);
            var s_squared_sum          = CC.ReduceSum(squared_values, new C.AxisVector(new C.Axis[] { new C.Axis(axis) }), keepDims: true);
            var epsilon                = C.Constant.Scalar(C.DataType.Float, 1e-7, computeDevice);
            var one                    = C.Constant.Scalar(C.DataType.Float, 1.0, computeDevice);
            var normalize_factor       = CC.Plus(CC.Sqrt(s_squared_sum), epsilon);
            var one_plus_s_squared_sum = CC.Plus(s_squared_sum, one);
            var scale                  = CC.ElementDivide(s_squared_sum, one_plus_s_squared_sum);

            scale = CC.ElementDivide(scale, normalize_factor);
            var result = CC.ElementTimes(scale, vectors, name);

            return(result);
        }
Beispiel #6
0
        C.Function create_capsule_layer(C.Function inputs, int num_capsule, int dim_capsule, int routings, string name)
        {
            var inputs_shape      = inputs.Output.Shape.Dimensions;
            var input_num_capsule = inputs_shape[0];
            var input_dim_capsule = inputs_shape[1];
            var W = new C.Parameter(
                new int[] { num_capsule, dim_capsule, input_num_capsule, input_dim_capsule },
                C.DataType.Float,
                CC.GlorotUniformInitializer(),
                computeDevice,
                name: "W");

            inputs = CC.Reshape(inputs, new int[] { 1, 1, input_num_capsule, input_dim_capsule }); // [1, 1, 1152, 8])
            var inputs_hat = CC.ElementTimes(W, inputs);

            inputs_hat = CC.ReduceSum(inputs_hat, new C.Axis(3));
            inputs_hat = CC.Squeeze(inputs_hat);

            C.Function outputs = null;
            var        zeros   = new C.Constant(new int[] { num_capsule, 1, input_num_capsule }, C.DataType.Float, 0, computeDevice);
            var        b       = CC.Combine(new C.VariableVector()
            {
                zeros
            });

            for (int i = 0; i < routings; i++)
            {
                var c = CC.Softmax(b, new C.Axis(0));
                var batch_dot_result = CC.ElementTimes(c, inputs_hat);
                batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(2));
                batch_dot_result = CC.Squeeze(batch_dot_result);
                outputs          = squash(batch_dot_result, name: $"squashed_{i}", axis: 1);
                if (i < (routings - 1))
                {
                    outputs          = CC.Reshape(outputs, new int[] { num_capsule, dim_capsule, 1 });
                    batch_dot_result = CC.ElementTimes(outputs, inputs_hat);
                    batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(1));
                    b = CC.Plus(b, batch_dot_result);
                }
            }
            outputs = CC.Combine(new C.VariableVector()
            {
                outputs
            }, name);
            return(outputs);
        }
Beispiel #7
0
        C.Function get_mask_and_infer_from_last_dimension(C.Function inputs, C.Function mask)
        {
            if (mask == null)
            {
                var inputs_shape = inputs.Output.Shape.Dimensions.ToArray();
                var ndims        = inputs_shape.Length - 1;
                var x            = CC.Sqrt(CC.ReduceSum(CC.Square(inputs), new C.Axis(ndims - 1)));
                x = CC.Squeeze(x);
                System.Diagnostics.Debug.Assert(x.Output.Shape.Dimensions.Count == 1);
                x    = CC.Argmax(x, new C.Axis(0));
                mask = CC.OneHotOp(x, numClass: (uint)inputs_shape[0], outputSparse: false, axis: new C.Axis(0));
            }
            mask = CC.Reshape(mask, mask.Output.Shape.AppendShape(new int[] { 1 }));
            var masked = CC.ElementTimes(inputs, mask);

            masked = CC.Flatten(masked);
            masked = CC.Squeeze(masked);
            return(masked);
        }
Beispiel #8
0
        C.Function create_capsule(string name, int[] input_shape, int[] extra_input_shape, int recognizer_dim, int generator_dim)
        {
            var input_dim   = Util.np_prod(input_shape);
            var x           = Util.placeholderVariable(input_shape, "x");
            var extra_input = Util.placeholderVariable(extra_input_shape, "extra_input");

            var x_flat                         = CC.Flatten(x);
            var recognition                    = Layers.Dense(x_flat, recognizer_dim, computeDevice, "recognition_layer", CC.Sigmoid);
            var probability                    = Layers.Dense(recognition, 1, computeDevice, "probability", CC.Sigmoid);
            var learnt_transformation          = Layers.Dense(recognition, 2, computeDevice, "xy_prediction");
            var learnt_transformation_extended = CC.Plus(learnt_transformation, extra_input, "learnt_transformation_extended");

            var generation = Layers.Dense(learnt_transformation_extended, generator_dim, computeDevice, "generator_layer", CC.Sigmoid);
            var out_flat   = Layers.Dense(generation, input_dim, computeDevice, "output");

            out_flat = CC.ElementTimes(out_flat, probability);
            var output = CC.Reshape(out_flat, input_shape);

            return(output);
        }
Beispiel #9
0
 public Tensor mul <T>(Tensor a, T b)
 {
     return(Out(C.ElementTimes(In(a), InGeneric(b))));
 }
Beispiel #10
0
 public Tensor mul(Tensor a, Tensor b)
 {
     return(Out(C.ElementTimes(In(a), In(b))));
 }
Beispiel #11
0
 public Tensor Mul(float a, Tensor b)
 {
     return(Out(C.ElementTimes(In(a, b.Shape), In(b), "")));
 }
Beispiel #12
0
 public Tensor Mul(Tensor a, float b)
 {
     return(Out(C.ElementTimes(In(a), In(b, a.Shape), "")));
 }
Beispiel #13
0
        void create_network()
        {
            imageVariable = Util.inputVariable(input_shape, "image");
            var conv1 = Layers.Convolution2D(
                imageVariable, 256, new int[] { 9, 9 }, computeDevice,
                use_padding: false, activation: CC.ReLU, name: "conv1");

            var primarycaps = create_primary_cap(
                conv1, dim_capsule: 8, n_channels: 32,
                kernel_size: new int[] { 9, 9 }, strides: new int[] { 2, 2 }, pad: false);

            var digitcaps = create_capsule_layer(
                primarycaps, num_capsule: 10, dim_capsule: 16,
                routings: routings, name: "digitcaps");

            var out_caps = get_length_and_remove_last_dimension(digitcaps, name: "capsnet");

            categoricalLabel = Util.inputVariable(new int[] { 10 }, "label");
            var masked_by_y = get_mask_and_infer_from_last_dimension(digitcaps, CC.Combine(new C.VariableVector()
            {
                categoricalLabel
            }));
            var masked = get_mask_and_infer_from_last_dimension(digitcaps, null);

            var decoder = create_decoder(masked.Output.Shape.Dimensions.ToArray());
            var decoder_output_training   = Model.invoke_model(decoder, new C.Variable[] { masked_by_y });
            var decoder_output_evaluation = Model.invoke_model(decoder, new C.Variable[] { masked });

            network = CC.Combine(new C.VariableVector()
            {
                out_caps, decoder_output_training
            }, "overall_training_network");
            Logging.log_number_of_parameters(network);

            // first component of the loss
            var y_true     = categoricalLabel;
            var y_pred     = out_caps;
            var digit_loss = CC.Plus(
                CC.ElementTimes(y_true, CC.Square(CC.ElementMax(DC(0), CC.Minus(DC(0.9), y_pred), ""))),
                CC.ElementTimes(DC(0.5),
                                CC.ElementTimes(CC.Minus(DC(1), y_true), CC.Square(CC.ElementMax(DC(0), CC.Minus(y_pred, DC(0.1)), "")))));

            digit_loss = CC.ReduceSum(digit_loss, C.Axis.AllStaticAxes());

            // second component of the loss
            var num_pixels_at_output = Util.np_prod(decoder_output_training.Output.Shape.Dimensions.ToArray());
            var squared_error        = CC.SquaredError(decoder_output_training, imageVariable);
            var image_mse            = CC.ElementDivide(squared_error, DC(num_pixels_at_output));

            loss_function = CC.Plus(digit_loss, CC.ElementTimes(DC(0.35), image_mse));
            eval_function = CC.ClassificationError(y_pred, y_true);

            learner = CC.AdamLearner(
                new C.ParameterVector(network.Parameters().ToArray()),
                new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size),
                new C.TrainingParameterScheduleDouble(0.9),
                true,
                new C.TrainingParameterScheduleDouble(0.99));

            trainer   = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner }));
            evaluator = CC.CreateEvaluator(eval_function);
        }