Beispiel #1
0
        /// <summary>
        /// Creates a new Function from specified operands.
        /// </summary>
        /// <param name="operands"></param>
        /// <returns></returns>
        public static Function Combine(System.Collections.Generic.IEnumerable <Variable> operands)
        {
            var varVect = new VariableVector();

            foreach (var v in operands)
            {
                varVect.Add(v);
            }
            return(CNTKLib.Combine(varVect));
        }
Beispiel #2
0
        C.Function create_capsule_layer(C.Function inputs, int num_capsule, int dim_capsule, int routings, string name)
        {
            var inputs_shape      = inputs.Output.Shape.Dimensions;
            var input_num_capsule = inputs_shape[0];
            var input_dim_capsule = inputs_shape[1];
            var W = new C.Parameter(
                new int[] { num_capsule, dim_capsule, input_num_capsule, input_dim_capsule },
                C.DataType.Float,
                CC.GlorotUniformInitializer(),
                computeDevice,
                name: "W");

            inputs = CC.Reshape(inputs, new int[] { 1, 1, input_num_capsule, input_dim_capsule }); // [1, 1, 1152, 8])
            var inputs_hat = CC.ElementTimes(W, inputs);

            inputs_hat = CC.ReduceSum(inputs_hat, new C.Axis(3));
            inputs_hat = CC.Squeeze(inputs_hat);

            C.Function outputs = null;
            var        zeros   = new C.Constant(new int[] { num_capsule, 1, input_num_capsule }, C.DataType.Float, 0, computeDevice);
            var        b       = CC.Combine(new C.VariableVector()
            {
                zeros
            });

            for (int i = 0; i < routings; i++)
            {
                var c = CC.Softmax(b, new C.Axis(0));
                var batch_dot_result = CC.ElementTimes(c, inputs_hat);
                batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(2));
                batch_dot_result = CC.Squeeze(batch_dot_result);
                outputs          = squash(batch_dot_result, name: $"squashed_{i}", axis: 1);
                if (i < (routings - 1))
                {
                    outputs          = CC.Reshape(outputs, new int[] { num_capsule, dim_capsule, 1 });
                    batch_dot_result = CC.ElementTimes(outputs, inputs_hat);
                    batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(1));
                    b = CC.Plus(b, batch_dot_result);
                }
            }
            outputs = CC.Combine(new C.VariableVector()
            {
                outputs
            }, name);
            return(outputs);
        }
        public CNTKFunction(CNTKBackend c, List <Variable> inputs, CNTK.Function[] outputs, List <List <Tensor> > updates, string name)
        {
            this.c                 = c;
            this.placeholders      = inputs;
            this.trainer           = null;
            this.unrelated_updates = null;
            this.updates           = updates;
            if (updates.Count > 0)
            {
                if (len(outputs) <= 0)
                {
                    throw new Exception();
                }

                this.loss = outputs[0];
                // need group update by gradient place holder
                var u_ops             = new List <CNTK.Function>();
                var unrelated_updates = new List <CNTK.Function>();
                foreach (List <Tensor> update in updates)
                {
                    CNTK.Function u;

                    if (update.Count == 1)
                    {
                        u = c.In(update[0]);
                    }
                    else if (update.Count == 2)
                    {
                        u = C.Assign(c.In(update[0]), c.In(update[1]));
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }

                    if (u.Inputs.Count == 0)
                    {
                        u_ops.Add(u);
                    }
                    else
                    {
                        unrelated_updates.Add(u);
                    }
                }

                var update_func = C.Combine(new VariableVector(u_ops.Select(u => u.Output).ToArray()));

                CNTK.Function[] grads = update_func.FindAllWithName("keras_grad_placeholder").ToArray();

                var u_list = new List <CNTK.Function>();
                var p_list = new List <CNTK.Parameter>();
                foreach (CNTK.Function g in grads)
                {
                    if (c.grad_parameter_dict.ContainsKey(g))
                    {
                        p_list.Add(c.grad_parameter_dict[g]);
                        u_list.Add(g);
                    }
                    else
                    {
                        throw new Exception($"CNTK backend: when constructing trainer, found gradient node {g} which is not related to any parameters in the model. Please double check how the gradient node is constructed.");
                    }
                }

                if (len(u_list) > 0)
                {
                    Learner learner = Learner.SGDLearner(p_list, new TrainingParameterScheduleDouble(0));

                    var criterion = (len(outputs) > 1) ?
                                    C.Combine(new VariableVector(new[] { outputs[0], outputs[1] })) :
                                    outputs[0];

                    this.trainer = Trainer.CreateTrainer(model: outputs[0], lossFunction: criterion, evaluationFunction: null, parameterLearners: new[] { learner });

                    this.trainer_output = new UnorderedMapVariableValuePtr();
                    foreach (CNTK.Function f in outputs)
                    {
                        this.trainer_output.Add(f, null);
                    }
                }
                else if (len(u_ops) > 0)
                {
                    unrelated_updates.AddRange(u_ops);
                }

                if (len(unrelated_updates) > 0)
                {
                    this.unrelated_updates = C.Combine(new VariableVector(unrelated_updates.Select(_ => _.Output).ToArray()));
                }
            }

            if (this.trainer == null)
            {
                this.metrics_outputs = outputs.Select(f => f.Output).ToArray();

                this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs));
                // cntk only could handle loss and 1 metric in trainer, for metrics more
                // than 2, need manual eval
            }
            else if (len(outputs) > 2)
            {
                this.metrics_outputs = Matrix.Get(outputs, 2, 0).Select(f => f.Output).ToArray();

                this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs));
            }
            else
            {
                this.metrics_func = null;
            }
        }
        /// <summary>
        /// Create a new Function instance which just combines the outputs of the specified list of 'operands' Functions such that the 'Outputs' of the
        /// new 'Function' are union of the 'Outputs' of each of the specified 'operands' Functions.
        /// E.g. When creating a classification model, typically the CrossEntropy loss Function and the ClassificationError Function comprise the two roots
        /// of the computation graph which can be "Combine"d to create a single Function with 2 outputs; viz. CrossEntropy loss and ClassificationError output.
        /// </summary>
        /// <param name="operands">variables whose function are to be combined</param>
        /// <param name="name"></param>
        /// <returns></returns>
        public static Function Combine(IList <Variable> operands, string name = "")
        {
            VariableVector operandVector = Helper.AsVariableVector(operands);

            return(CNTKLib.Combine(operandVector, name));
        }
Beispiel #5
0
        public CNTKFunction(CNTKBackend c, Variable[] inputs, CNTK.Variable[] outputs, List <List <Tensor> > updates, string name)
        {
            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1501
            this.c                 = c;
            this.placeholders      = inputs;
            this.trainer           = null;
            this.unrelated_updates = null;
            this.updates           = updates;
            if (updates.Count > 0)
            {
                if (len(outputs) <= 0)
                {
                    throw new Exception();
                }

                this.loss = outputs[0];
                // need group update by gradient place holder
                var u_ops             = new List <CNTK.Function>();
                var unrelated_updates = new List <CNTK.Function>();
                foreach (List <Tensor> update in updates)
                {
                    CNTK.Function u;

                    if (update.Count == 1)
                    {
                        u = c.In(update[0]);
                    }
                    else if (update.Count == 2)
                    {
                        u = C.Assign(c.In(update[0]), c.In(update[1]));
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }

                    if (u.Arguments.Count == 0)
                    {
                        u_ops.Add(u);
                    }
                    else
                    {
                        unrelated_updates.Add(u);
                    }
                }

                var update_func = C.Combine(new VariableVector(u_ops.Select(u => u.Output).ToArray()));

                CNTK.Constant[] grads = update_func.Inputs.Where(x => x.Name == "keras_grad_placeholder").Select(x => new Constant(x)).ToArray();

                var u_list = new List <CNTK.Constant>();
                var p_list = new List <CNTK.Parameter>();
                foreach (CNTK.Constant g in grads)
                {
                    if (c.grad_parameter_dict.ContainsKey(g.Uid))
                    {
                        p_list.Add(c.grad_parameter_dict[g.Uid]);
                        u_list.Add(g);
                    }
                    else
                    {
                        throw new Exception($"CNTK backend: when constructing trainer, found gradient node {g} which is not related to any parameters in the model. Please double check how the gradient node is constructed.");
                    }
                }

                if (len(u_list) > 0)
                {
                    Learner learner = Learner.SGDLearner(p_list, new TrainingParameterScheduleDouble(1));

                    this.trainer = Trainer.CreateTrainer(model: outputs[0],
                                                         lossFunction: outputs[0],
                                                         evaluationFunction: outputs[1],
                                                         parameterLearners: new[] { learner });
                }
                else if (len(u_ops) > 0)
                {
                    unrelated_updates.AddRange(u_ops);
                }

                if (len(unrelated_updates) > 0)
                {
                    this.unrelated_updates = C.Combine(new VariableVector(unrelated_updates.Select(_ => _.Output).ToArray()));
                }
            }

            if (this.trainer == null)
            {
                this.metrics_outputs = outputs;

                this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs));
                // cntk only could handle loss and 1 metric in trainer, for metrics more
                // than 2, need manual eval
            }
            else if (len(outputs) > 2)
            {
                this.metrics_outputs = Matrix.Get(outputs, 2, 0);

                this.metrics_func = C.Combine(new VariableVector(this.metrics_outputs));
            }
            else
            {
                this.metrics_func = null;
            }
        }
Beispiel #6
0
        void create_network()
        {
            imageVariable = Util.inputVariable(input_shape, "image");
            var conv1 = Layers.Convolution2D(
                imageVariable, 256, new int[] { 9, 9 }, computeDevice,
                use_padding: false, activation: CC.ReLU, name: "conv1");

            var primarycaps = create_primary_cap(
                conv1, dim_capsule: 8, n_channels: 32,
                kernel_size: new int[] { 9, 9 }, strides: new int[] { 2, 2 }, pad: false);

            var digitcaps = create_capsule_layer(
                primarycaps, num_capsule: 10, dim_capsule: 16,
                routings: routings, name: "digitcaps");

            var out_caps = get_length_and_remove_last_dimension(digitcaps, name: "capsnet");

            categoricalLabel = Util.inputVariable(new int[] { 10 }, "label");
            var masked_by_y = get_mask_and_infer_from_last_dimension(digitcaps, CC.Combine(new C.VariableVector()
            {
                categoricalLabel
            }));
            var masked = get_mask_and_infer_from_last_dimension(digitcaps, null);

            var decoder = create_decoder(masked.Output.Shape.Dimensions.ToArray());
            var decoder_output_training   = Model.invoke_model(decoder, new C.Variable[] { masked_by_y });
            var decoder_output_evaluation = Model.invoke_model(decoder, new C.Variable[] { masked });

            network = CC.Combine(new C.VariableVector()
            {
                out_caps, decoder_output_training
            }, "overall_training_network");
            Logging.log_number_of_parameters(network);

            // first component of the loss
            var y_true     = categoricalLabel;
            var y_pred     = out_caps;
            var digit_loss = CC.Plus(
                CC.ElementTimes(y_true, CC.Square(CC.ElementMax(DC(0), CC.Minus(DC(0.9), y_pred), ""))),
                CC.ElementTimes(DC(0.5),
                                CC.ElementTimes(CC.Minus(DC(1), y_true), CC.Square(CC.ElementMax(DC(0), CC.Minus(y_pred, DC(0.1)), "")))));

            digit_loss = CC.ReduceSum(digit_loss, C.Axis.AllStaticAxes());

            // second component of the loss
            var num_pixels_at_output = Util.np_prod(decoder_output_training.Output.Shape.Dimensions.ToArray());
            var squared_error        = CC.SquaredError(decoder_output_training, imageVariable);
            var image_mse            = CC.ElementDivide(squared_error, DC(num_pixels_at_output));

            loss_function = CC.Plus(digit_loss, CC.ElementTimes(DC(0.35), image_mse));
            eval_function = CC.ClassificationError(y_pred, y_true);

            learner = CC.AdamLearner(
                new C.ParameterVector(network.Parameters().ToArray()),
                new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size),
                new C.TrainingParameterScheduleDouble(0.9),
                true,
                new C.TrainingParameterScheduleDouble(0.99));

            trainer   = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner }));
            evaluator = CC.CreateEvaluator(eval_function);
        }