public BottleneckV2(int channels, int stride, bool downsample = false, int in_channels = 0, string prefix = "", ParameterDict @params = null) : base(prefix, @params) { var channel_one_fourth = Convert.ToInt32(channels / 4); bn1 = new BatchNorm(); conv1 = new Conv2D(channel_one_fourth, (1, 1), (1, 1), use_bias: false); bn2 = new BatchNorm(); conv2 = ResNet.Conv3x3(channel_one_fourth, stride, channel_one_fourth); bn3 = new BatchNorm(); conv3 = new Conv2D(channels, (1, 1), (1, 1), use_bias: false); RegisterChild(bn1, "bn1"); RegisterChild(conv1, "conv1"); RegisterChild(bn2, "bn2"); RegisterChild(conv2, "conv2"); RegisterChild(bn3, "bn3"); RegisterChild(conv3, "conv3"); if (downsample) { ds = new Conv2D(channels, (1, 1), (stride, stride), use_bias: false, in_channels: in_channels); RegisterChild(ds, "downsample"); } else { ds = null; } }
public static Function BatchNorm(this Function input, BatchNorm batchNorm, DeviceDescriptor device, DataType dataType, double initialScaleValue = 1, double initialBiasValue = 0, int normalizationTimeConstant = 5000) { var inferredDimension1D = NDShape.CreateNDShape(new int[] { NDShape.InferredDimension }); var scaleInitializer = CNTKLib.ConstantInitializer(initialScaleValue); var scaleParams = new Parameter(inferredDimension1D, dataType, scaleInitializer, device); var biasInitializer = CNTKLib.ConstantInitializer(initialBiasValue); var biasParams = new Parameter(inferredDimension1D, dataType, biasInitializer, device); const double zeroInit = 1.0; // Batch normalization initial state are constants. var runningMean = new Constant(inferredDimension1D, dataType, zeroInit, device); var runningInvStd = new Constant(inferredDimension1D, dataType, zeroInit, device); var runningCount = new Constant(NDShape.CreateNDShape(new[] { 1 }), dataType, zeroInit, device); bool spatial = batchNorm == LayerFunctions.BatchNorm.Spatial; // Allows to smooth batch estimates with the running statistics. // However, this has not been found useful so far in our experiments (from CNTK team). const double blendTimeConstant = 0.0; // Epsilon is added to the variance to avoid division by 0. const double epsilon = 0.00001; bool useCudnn = device.Type == DeviceKind.GPU; const bool disableRegularization = false; // TODO: Consider if we want to surface the additional options for BatchNorm: // - blendTimeConstant // - epsilon // - useCudnn // - disableRegularization // - name return(CNTKLib.BatchNormalization(input, scaleParams, biasParams, runningMean, runningInvStd, runningCount, spatial, normalizationTimeConstant, blendTimeConstant, epsilon, useCudnn, disableRegularization)); }
public BasicBlockV2(int channels, int stride, bool downsample = false, int in_channels = 0, string prefix = null, ParameterDict @params = null) : base(prefix, @params) { bn1 = new BatchNorm(); conv1 = ResNet.Conv3x3(channels, stride, in_channels); bn2 = new BatchNorm(); conv2 = ResNet.Conv3x3(channels, 1, in_channels); if (downsample) { ds = new Conv2D(channels, (1, 1), (stride, stride), use_bias: false, in_channels: in_channels); } else { ds = null; } }