//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Gets kernel size. </summary> /// /// <param name="param"> The parameter. </param> /// /// <returns> The kernel size. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// static Size GetKernelSize(ConvolutionParameter param) { if (param.KernelH > 0) { return(new Size((int)param.KernelW, (int)param.KernelH)); } if (param.KernelSizes.Length == 1) { return(new Size((int)param.KernelSizes[0], (int)param.KernelSizes[0])); } return(new Size((int)param.KernelSizes[1], (int)param.KernelSizes[0])); }
static int[] GetKernelSize(ConvolutionParameter param) { if (param.KernelH > 0) { return(new [] { (int)param.KernelW, (int)param.KernelH }); } if (param.KernelSizes.Length == 1) { return(new [] { (int)param.KernelSizes[0], (int)param.KernelSizes[0] }); } return(new [] { (int)param.KernelSizes[1], (int)param.KernelSizes[0] }); }
//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Gets kernel pad. </summary> /// /// <param name="param"> The parameter. </param> /// /// <returns> The kernel pad. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// static Size GetKernelPad(ConvolutionParameter param) { if (param.PadH > 0) { return(new Size((int)param.PadW, (int)param.PadH)); } if (param.Pads == null || param.Pads.Length == 0) { return(new Size(1, 1)); } if (param.Pads.Length == 1) { return(new Size((int)param.Pads[0], (int)param.Pads[0])); } return(new Size((int)param.Pads[1], (int)param.Pads[0])); }
//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Gets kernel stride. </summary> /// /// <param name="param"> The parameter. </param> /// /// <returns> The kernel stride. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// static Size GetKernelStride(ConvolutionParameter param) { if (param.StrideH > 0) { return(new Size((int)param.StrideW, (int)param.StrideH)); } if (param.Strides == null || param.Strides.Length == 0) { return(new Size(1, 1)); } if (param.Strides.Length == 1) { return(new Size((int)param.Strides[0], (int)param.Strides[0])); } return(new Size((int)param.Strides[1], (int)param.Strides[0])); }
static int[] GetKernelPad(ConvolutionParameter param) { if (param.PadH > 0) { return(new [] { (int)param.PadW, (int)param.PadH }); } if (param.Pads == null || param.Pads.Length == 0) { return(new [] { 1, 1 }); } if (param.Pads.Length == 1) { return(new [] { (int)param.Pads[0], (int)param.Pads[0] }); } return(new [] { (int)param.Pads[1], (int)param.Pads[0] }); }
static int[] GetKernelStride(ConvolutionParameter param) { if (param.StrideH > 0) { return(new [] { (int)param.StrideW, (int)param.StrideH }); } if (param.Strides == null || param.Strides.Length == 0) { return(new [] { 1, 1 }); } if (param.Strides.Length == 1) { return(new [] { (int)param.Strides[0], (int)param.Strides[0] }); } return(new [] { (int)param.Strides[1], (int)param.Strides[0] }); }
//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Sets up the convolution. </summary> /// /// <param name="param"> The parameter. </param> /// <param name="blobs"> The blobs. </param> /// <param name="name"> The name. </param> /// <param name="inputNames"> List of names of the inputs. </param> /// <param name="outputNames"> List of names of the outputs. </param> /// /// <returns> A Convolution2D. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// static Convolution2D SetupConvolution(ConvolutionParameter param, List <BlobProto> blobs, string name, string[] inputNames, string[] outputNames) { Size ksize = GetKernelSize(param); Size stride = GetKernelStride(param); Size pad = GetKernelPad(param); int num = GetNum(blobs[0]); int channels = GetChannels(blobs[0]); int nIn = channels * (int)param.Group; int nOut = num; float[] w = blobs[0].Datas; if (param.BiasTerm) { float[] b = blobs[1].Datas; return(new Convolution2D(nIn, nOut, ksize, stride, pad, !param.BiasTerm, w, b, name: name, inputNames: inputNames, outputNames: outputNames)); } return(new Convolution2D(nIn, nOut, ksize, stride, pad, !param.BiasTerm, w, name: name, inputNames: inputNames, outputNames: outputNames)); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { if (!reshapeNeeded(colBottom, colTop)) { return; } // Configure the kernel size, padding, stride and inputs. ConvolutionParameter p = m_param.convolution_param; m_bForceNDim2col = p.force_nd_im2col; m_nChannelAxis = colBottom[0].CanonicalAxisIndex(p.axis); int nFirstSpatialAxis = m_nChannelAxis + 1; int nNumAxes = colBottom[0].num_axes; m_nNumSpatialAxes = nNumAxes - nFirstSpatialAxis; m_log.CHECK_GE(m_nNumSpatialAxes, 0, "The number of spatial axes must be zero or greater."); List <int> rgBottomDimBlobShape = new List <int>() { m_nNumSpatialAxes + 1 }; List <int> rgSpaitalDimBlobShape = new List <int>() { Math.Max(m_nNumSpatialAxes, 1) }; // Setup filter kernel dimensions (blobKernelShape) m_blobKernelShape.Reshape(rgSpaitalDimBlobShape); T[] rgKernelShape = m_blobKernelShape.mutable_cpu_data; if (p.kernel_h.HasValue || p.kernel_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "kernel_h & kernel_w can only be used in 2D convolution."); m_log.CHECK_EQ(0, p.kernel_size.Count, "Either kernel_size or kernel_h/w should be specified; not both."); rgKernelShape[0] = (T)Convert.ChangeType(p.kernel_h.Value, typeof(T)); rgKernelShape[1] = (T)Convert.ChangeType(p.kernel_w.Value, typeof(T)); } else { int nNumKernelDims = p.kernel_size.Count; m_log.CHECK(nNumKernelDims == 1 || nNumKernelDims == m_nNumSpatialAxes, "Kernel size must be specified once, or once per spatial dimension (kernel_size specified " + nNumKernelDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); for (int i = 0; i < m_nNumSpatialAxes; i++) { int nIdx = (nNumKernelDims == 1) ? 0 : i; rgKernelShape[i] = (T)Convert.ChangeType(p.kernel_size[nIdx], typeof(T)); } } for (int i = 0; i < m_nNumSpatialAxes; i++) { m_log.CHECK_GT((int)Convert.ChangeType(rgKernelShape[i], typeof(int)), 0, "Filter dimension must be non-zero."); } m_blobKernelShape.mutable_cpu_data = rgKernelShape; // Setup stride dimensions (blobStride) m_blobStride.Reshape(rgSpaitalDimBlobShape); T[] rgStrideData = m_blobStride.mutable_cpu_data; if (p.stride_h.HasValue || p.stride_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "stride_h & stride_w can only be used in 2D convolution."); m_log.CHECK_EQ(0, p.stride.Count, "Either stride_size or stride_h/w should be specified; not both."); rgStrideData[0] = (T)Convert.ChangeType(p.stride_h.Value, typeof(T)); rgStrideData[1] = (T)Convert.ChangeType(p.stride_w.Value, typeof(T)); } else { int nNumStrideDims = p.stride.Count; m_log.CHECK(nNumStrideDims == 0 || nNumStrideDims == 1 || nNumStrideDims == m_nNumSpatialAxes, "Stride size must be specified once, or once per spatial dimension (stride specified " + nNumStrideDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); int nDefaultStride = 1; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (nNumStrideDims == 0) { rgStrideData[i] = (T)Convert.ChangeType(nDefaultStride, typeof(T)); } else { int nIdx = (nNumStrideDims == 1) ? 0 : i; rgStrideData[i] = (T)Convert.ChangeType(p.stride[nIdx], typeof(T)); } m_log.CHECK_GT((int)Convert.ChangeType(rgStrideData[i], typeof(int)), 0, "Stride dimension must be non-zero."); } } m_blobStride.mutable_cpu_data = rgStrideData; // Setup pad dimensions (blobPad) m_blobPad.Reshape(rgSpaitalDimBlobShape); T[] rgPadData = m_blobPad.mutable_cpu_data; if (p.pad_h.HasValue || p.pad_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "pad_h & pad_w can only be used in 2D convolution."); m_log.CHECK_EQ(0, p.pad.Count, "Either pad_size or pad_h/w should be specified; not both."); rgPadData[0] = (T)Convert.ChangeType(p.pad_h.Value, typeof(T)); rgPadData[1] = (T)Convert.ChangeType(p.pad_w.Value, typeof(T)); } else { int nNumPadDims = p.pad.Count; m_log.CHECK(nNumPadDims == 0 || nNumPadDims == 1 || nNumPadDims == m_nNumSpatialAxes, "Pad size must be specified once, or once per spatial dimension (pad specified " + nNumPadDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); int nDefaultPad = 0; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (nNumPadDims == 0) { rgPadData[i] = (T)Convert.ChangeType(nDefaultPad, typeof(T)); } else { int nIdx = (nNumPadDims == 1) ? 0 : i; rgPadData[i] = (T)Convert.ChangeType(p.pad[nIdx], typeof(T)); } } } m_blobPad.mutable_cpu_data = rgPadData; // Setup dilation dimensions (blobDilation) m_blobDilation.Reshape(rgSpaitalDimBlobShape); T[] rgDilationData = m_blobDilation.mutable_cpu_data; int nNumDilationDims = p.dilation.Count; m_log.CHECK(nNumDilationDims == 0 || nNumDilationDims == 1 || nNumDilationDims == m_nNumSpatialAxes, "Dilation size must be specified once, or once per spatial dimension (dilation specified " + nNumDilationDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); int nDefaultDilation = 1; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (nNumDilationDims == 0) { rgDilationData[i] = (T)Convert.ChangeType(nDefaultDilation, typeof(T)); } else { int nIdx = (nNumDilationDims == 1) ? 0 : i; rgDilationData[i] = (T)Convert.ChangeType(p.dilation[nIdx], typeof(T)); } } m_blobDilation.mutable_cpu_data = rgDilationData; // Special case: im2col is the identity for 1x1 convolution with stride 1 // add no padding, so flag for skipping the buffer and transformation. m_bIs1x1 = true; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (!(val_at(rgKernelShape, i) == 1 && val_at(rgStrideData, i) == 1 && val_at(rgPadData, i) == 0)) { m_bIs1x1 = false; break; } } // Configure output channels and groups. m_nChannels = colBottom[0].shape(m_nChannelAxis); m_nNumOutput = (int)p.num_output; m_log.CHECK_GT(m_nNumOutput, 0, "Output count must be greater than zero."); m_nGroup = (int)p.group; m_log.CHECK_EQ(m_nChannels % m_nGroup, 0, "The channels must span evenly across the groups."); m_log.CHECK_EQ(m_nNumOutput % m_nGroup, 0, "The number of output should be a in multiples of group."); if (reverse_dimensions()) { m_nConvOutChannels = m_nChannels; m_nConvInChannels = m_nNumOutput; } else { m_nConvOutChannels = m_nNumOutput; m_nConvInChannels = m_nChannels; } // Handle the parameters: weights and biases // - blobs[0] holds the filter weights. // - blobs[1] holds the biases (optional) List <int> rgWeightShape = new List <int>(); rgWeightShape.Add(m_nConvOutChannels); rgWeightShape.Add(m_nConvInChannels / m_nGroup); for (int i = 0; i < m_nNumSpatialAxes; i++) { rgWeightShape.Add(val_at(rgKernelShape, i)); } m_bBiasTerm = p.bias_term; List <int> rgBiasShape = new List <int>() { m_nNumOutput }; // Setup the convert to half flags used by the Layer just before calling forward and backward. if (p.useCudnn(m_nNumSpatialAxes)) { m_bUseHalfSize = m_param.use_halfsize; } if (m_colBlobs.Count > 0) { m_log.CHECK_EQ(1 + ((m_bBiasTerm) ? 1 : 0), m_colBlobs.Count, "Incorrect number of weight blobs."); if (!Utility.Compare <int>(rgWeightShape, m_colBlobs[0].shape())) { Blob <T> b = new Blob <T>(m_cuda, m_log, rgWeightShape); m_log.FAIL("Incorrect weight shape: expected shape " + b.shape_string + "; instead, shape was " + m_colBlobs[0].shape_string); } if (m_bBiasTerm && !Utility.Compare <int>(rgBiasShape, m_colBlobs[1].shape())) { Blob <T> b = new Blob <T>(m_cuda, m_log, rgBiasShape); m_log.FAIL("Incorrect bias shape: expected shape " + b.shape_string + "; instead, shape was " + m_colBlobs[1].shape_string); } m_log.WriteLine("Skipping parameter initialization."); } else { m_colBlobs.Clear(); // Initialize and fill the weights: // output channels x input channels per-group x kernel height x kernel width. Blob <T> blobWts = new Blob <T>(m_cuda, m_log, true, m_bUseHalfSize); blobWts.Name = colTop[0].Name + " weights"; blobWts.type = BLOB_TYPE.WEIGHT; if (m_bUseHalfSize || !shareParameter(blobWts, rgWeightShape)) { blobWts.Reshape(rgWeightShape, m_bUseHalfSize); Filler <T> wtFiller = Filler <T> .Create(m_cuda, m_log, p.weight_filler); Blob <T> blobWts1 = blobWts; if (m_bUseHalfSize) { blobWts1 = new Blob <T>(m_cuda, m_log, false, false); blobWts1.ReshapeLike(blobWts); } wtFiller.Fill(blobWts1); if (m_bUseHalfSize) { blobWts.CopyFrom(blobWts1); blobWts1.Dispose(); } } m_colBlobs.Add(blobWts); // If necessary, initialize and fill the biases: if (m_bBiasTerm) { Blob <T> blobBias = new Blob <T>(m_cuda, m_log, true, m_bUseHalfSize); blobBias.Name = colTop[0].Name + " bias"; blobBias.type = BLOB_TYPE.WEIGHT; if (m_bUseHalfSize || !shareParameter(blobBias, rgBiasShape)) { blobBias.Reshape(rgBiasShape, m_bUseHalfSize); Filler <T> biasFiller = Filler <T> .Create(m_cuda, m_log, p.bias_filler); Blob <T> blobBias1 = blobBias; if (m_bUseHalfSize) { blobBias1 = new Blob <T>(m_cuda, m_log, false, false); blobBias1.ReshapeLike(blobBias); } biasFiller.Fill(blobBias1); if (m_bUseHalfSize) { blobBias.CopyFrom(blobBias1); blobBias1.Dispose(); } } m_colBlobs.Add(blobBias); } } m_nKernelDim = m_colBlobs[0].count(1); m_nWeightOffset = m_nConvOutChannels * m_nKernelDim / m_nGroup; // Propagate gradients to the parameters (as directed by backward pass). m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count, true); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { ConvolutionParameter p = m_param.convolution_param; m_bForceNDIm2Col = p.force_nd_im2col; int nInputNumDims = colBottom[0].shape().Count; m_nChannelAxis = colBottom[0].CanonicalAxisIndex(p.axis); int nFirstSpatialDim = m_nChannelAxis + 1; m_nNumSpatialAxes = nInputNumDims - nFirstSpatialDim; m_log.CHECK_GE(m_nNumSpatialAxes, 1, "The spatial axis count must be >= 1."); List <int> rgDimBlobShape = new List <int>() { m_nNumSpatialAxes }; // Setup filter kernel dimensions (kernel_shape_). m_blobKernelShape.Reshape(rgDimBlobShape); T[] rgKernelShape = m_blobKernelShape.mutable_cpu_data; if (p.kernel_h.HasValue || p.kernel_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "kernel_h & kernel_w can only be used for 2D convolution."); m_log.CHECK_EQ(0, p.kernel_size.Count, "Either kernel_size or kernel_h/w should be specified; not both."); rgKernelShape[0] = (T)Convert.ChangeType(p.kernel_h.Value, typeof(T)); rgKernelShape[1] = (T)Convert.ChangeType(p.kernel_w.Value, typeof(T)); } else { int nNumKernelDims = p.kernel_size.Count; m_log.CHECK(nNumKernelDims == 1 || nNumKernelDims == m_nNumSpatialAxes, "kernel_size must be specified once, or once per spatial dimension (kernel_size specified " + nNumKernelDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); for (int i = 0; i < m_nNumSpatialAxes; i++) { uint nKernel = p.kernel_size[(nNumKernelDims == 1) ? 0 : i]; rgKernelShape[i] = (T)Convert.ChangeType(nKernel, typeof(T)); } } for (int i = 0; i < m_nNumSpatialAxes; i++) { int nVal = (int)Convert.ChangeType(rgKernelShape[i], typeof(int)); m_log.CHECK_GT(nVal, 0, "Filter dimensions must be nonzero."); } m_blobKernelShape.mutable_cpu_data = rgKernelShape; // Setup stride dimensions (stride_). m_blobStride.Reshape(rgDimBlobShape); T[] rgStrideData = m_blobStride.mutable_cpu_data; if (p.stride_h.HasValue || p.stride_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "stride_h & stride_w can only be used for 2D convolution."); m_log.CHECK_EQ(0, p.stride.Count, "Either stride or stride_h/w should be specified; not both."); rgStrideData[0] = (T)Convert.ChangeType(p.stride_h.Value, typeof(T)); rgStrideData[1] = (T)Convert.ChangeType(p.stride_w.Value, typeof(T)); } else { int nNumStrideDims = p.stride.Count; m_log.CHECK(nNumStrideDims == 0 || nNumStrideDims == 1 || nNumStrideDims == m_nNumSpatialAxes, "stride must be specified once, or once per spatial dimension (stride specified " + nNumStrideDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); uint nDefaultStride = 1; for (int i = 0; i < m_nNumSpatialAxes; i++) { uint nStride = (nNumStrideDims == 0) ? nDefaultStride : p.stride[(nNumStrideDims == 1) ? 0 : i]; rgStrideData[i] = (T)Convert.ChangeType(nStride, typeof(T)); } } m_blobStride.mutable_cpu_data = rgStrideData; // Setup pad dimensions (pad_). m_blobPad.Reshape(rgDimBlobShape); T[] rgPadData = m_blobPad.mutable_cpu_data; if (p.pad_h.HasValue || p.pad_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "pad_h & pad_w can only be used for 2D convolution."); m_log.CHECK_EQ(0, p.pad.Count, "Either pad or pad_h/w should be specified; not both."); rgPadData[0] = (T)Convert.ChangeType(p.pad_h.Value, typeof(T)); rgPadData[1] = (T)Convert.ChangeType(p.pad_w.Value, typeof(T)); } else { int nNumPadDims = p.pad.Count; m_log.CHECK(nNumPadDims == 0 || nNumPadDims == 1 || nNumPadDims == m_nNumSpatialAxes, "pad must be specified once, or once per spatial dimension (pad specified " + nNumPadDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); uint nDefaultPad = 0; for (int i = 0; i < m_nNumSpatialAxes; i++) { uint nPad = (nNumPadDims == 0) ? nDefaultPad : p.pad[(nNumPadDims == 1) ? 0 : i]; rgPadData[i] = (T)Convert.ChangeType(nPad, typeof(T)); } } m_blobPad.mutable_cpu_data = rgPadData; // Setup dilation dimensions (dilation_). m_blobDilation.Reshape(rgDimBlobShape); T[] rgDilationData = m_blobDilation.mutable_cpu_data; int nNumDilationDims = p.dilation.Count; m_log.CHECK(nNumDilationDims == 0 || nNumDilationDims == 1 || nNumDilationDims == m_nNumSpatialAxes, "dilation must be specified once, or once per spatial dimension (dilation specified " + nNumDilationDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); uint nDefaultDilation = 1; for (int i = 0; i < m_nNumSpatialAxes; i++) { uint nPad = (nNumDilationDims == 0) ? nDefaultDilation : p.dilation[(nNumDilationDims == 1) ? 0 : i]; rgDilationData[i] = (T)Convert.ChangeType(nPad, typeof(T)); } m_blobDilation.mutable_cpu_data = rgDilationData; }