C.Function create_capsule_layer(C.Function inputs, int num_capsule, int dim_capsule, int routings, string name) { var inputs_shape = inputs.Output.Shape.Dimensions; var input_num_capsule = inputs_shape[0]; var input_dim_capsule = inputs_shape[1]; var W = new C.Parameter( new int[] { num_capsule, dim_capsule, input_num_capsule, input_dim_capsule }, C.DataType.Float, CC.GlorotUniformInitializer(), computeDevice, name: "W"); inputs = CC.Reshape(inputs, new int[] { 1, 1, input_num_capsule, input_dim_capsule }); // [1, 1, 1152, 8]) var inputs_hat = CC.ElementTimes(W, inputs); inputs_hat = CC.ReduceSum(inputs_hat, new C.Axis(3)); inputs_hat = CC.Squeeze(inputs_hat); C.Function outputs = null; var zeros = new C.Constant(new int[] { num_capsule, 1, input_num_capsule }, C.DataType.Float, 0, computeDevice); var b = CC.Combine(new C.VariableVector() { zeros }); for (int i = 0; i < routings; i++) { var c = CC.Softmax(b, new C.Axis(0)); var batch_dot_result = CC.ElementTimes(c, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(2)); batch_dot_result = CC.Squeeze(batch_dot_result); outputs = squash(batch_dot_result, name: $"squashed_{i}", axis: 1); if (i < (routings - 1)) { outputs = CC.Reshape(outputs, new int[] { num_capsule, dim_capsule, 1 }); batch_dot_result = CC.ElementTimes(outputs, inputs_hat); batch_dot_result = CC.ReduceSum(batch_dot_result, new C.Axis(1)); b = CC.Plus(b, batch_dot_result); } } outputs = CC.Combine(new C.VariableVector() { outputs }, name); return(outputs); }
/// <summary> /// Batch normalization layer (Ioffe and Szegedy, 2014). Normalize the activations of the previous layer at each batch, i.e.applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1. /// </summary> /// <param name="layer">The output of the last layer.</param> /// <param name="epsilon">Small float added to variance to avoid dividing by zero.</param> /// <param name="betaInitializer">Initializer for the beta weight.</param> /// <param name="gammaInitializers">Initializer for the gamma weight.</param> /// <param name="runningMeanInitializer">Initializer for the running mean weight.</param> /// <param name="runningStdInvInitializer">Initializer for the running standard inv weight.</param> /// <param name="spatial">Boolean, if yes the input data is spatial (2D). If not, then sets to 1D</param> /// <param name="normalizationTimeConstant">The time constant in samples of the first-order low-pass filter that is used to compute mean/variance statistics for use in inference</param> /// <param name="blendTimeConst">The blend time constant in samples.</param> /// <returns></returns> public static Function BatchNorm(Variable layer, float epsilon = 0.001f, Initializer betaInitializer = null, Initializer gammaInitializers = null, Initializer runningMeanInitializer = null, Initializer runningStdInvInitializer = null, bool spatial = true, float normalizationTimeConstant = 4096f, float blendTimeConst = 0.0f) { betaInitializer = betaInitializer ?? new Zeros(); gammaInitializers = gammaInitializers ?? new Zeros(); runningMeanInitializer = runningMeanInitializer ?? new Zeros(); runningStdInvInitializer = runningStdInvInitializer ?? new Zeros(); var biasParams = new Parameter(new int[] { NDShape.InferredDimension }, DataType.Float, betaInitializer.Get(), GlobalParameters.Device, ""); var scaleParams = new Parameter(new int[] { NDShape.InferredDimension }, DataType.Float, gammaInitializers.Get(), GlobalParameters.Device, ""); var runningMean = new Parameter(new int[] { NDShape.InferredDimension }, DataType.Float, runningMeanInitializer.Get(), GlobalParameters.Device, ""); var runningInvStd = new CNTK.Constant(new int[] { NDShape.InferredDimension }, 0.0f, GlobalParameters.Device); var runningCount = CNTK.Constant.Scalar(0.0f, GlobalParameters.Device); bool useCudnn = false; if (GlobalParameters.Device.Type == DeviceKind.GPU) { useCudnn = true; } return(CNTKLib.BatchNormalization(layer, scaleParams, biasParams, runningMean, runningInvStd, runningCount, spatial, normalizationTimeConstant, blendTimeConst, epsilon, useCudnn)); }
internal Constant(CNTK.Constant constant) : base(constant) { UnderlyingConstant = constant; }