예제 #1
0
        public Tensor categorical_crossentropy(Tensor target, Tensor output, bool from_logits = false)
        {
            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1480

            var _output = In(output);
            var _target = In(target);

            if (from_logits)
            {
                var result = C.CrossEntropyWithSoftmax(_output, _target);
                // cntk's result shape is (batch, 1), while keras expect (batch, )
                CNTK.Function r = C.Reshape(result, NDShape.CreateNDShape(new int[] { }));
                return(Out(r));
            }
            else
            {
                // scale preds so that the class probas of each sample sum to 1
                var o     = C.ElementDivide(_output.function, C.ReduceSum(_output, Axis.EndStaticAxis()));
                var eps   = Constant.Scalar(epsilon(), DeviceDescriptor.CPUDevice);
                var omeps = Constant.Scalar(1.0 - epsilon(), DeviceDescriptor.CPUDevice);
                // avoid numerical instability with _EPSILON clipping
                o = C.Clip(o, eps, omeps);
                CNTK.Function r = C.Negate(C.ReduceSum(C.ElementTimes(_target, C.Log(_output)), Axis.EndStaticAxis()));
                return(Out(r));
            }
        }
예제 #2
0
        void create_network()
        {
            Console.WriteLine("Compute Device: " + computeDevice.AsString());
            imageVariable       = Util.inputVariable(new int[] { 28, 28, 1 }, "image_tensor");
            categoricalVariable = Util.inputVariable(new int[] { 10 }, "label_tensor");

            network = imageVariable;
            network = Layers.Convolution2D(network, 32, new int[] { 3, 3 }, computeDevice, CC.ReLU);
            network = CC.Pooling(network, C.PoolingType.Max, new int[] { 2, 2 }, new int[] { 2 });
            network = Layers.Convolution2D(network, 64, new int[] { 3, 3 }, computeDevice, CC.ReLU);
            network = CC.Pooling(network, C.PoolingType.Max, new int[] { 2, 2 }, new int[] { 2 });
            network = Layers.Convolution2D(network, 64, new int[] { 3, 3 }, computeDevice, CC.ReLU);
            network = Layers.Dense(network, 64, computeDevice, activation: CC.ReLU);
            network = Layers.Dense(network, 10, computeDevice);

            Logging.detailed_summary(network);
            Logging.log_number_of_parameters(network);

            loss_function = CC.CrossEntropyWithSoftmax(network, categoricalVariable);
            eval_function = CC.ClassificationError(network, categoricalVariable);

            learner = CC.AdamLearner(
                new C.ParameterVector(network.Parameters().ToArray()),
                new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size),
                new C.TrainingParameterScheduleDouble(0.9),
                true,
                new C.TrainingParameterScheduleDouble(0.99));

            trainer   = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner }));
            evaluator = CC.CreateEvaluator(eval_function);
        }