/// <summary> /// Generates prior boxes for a layer with specified parameters. /// </summary> /// <param name="colBottom">bottom input Blob vector (Length - at least 2) /// -# @f$ (N \times C \times H_i \times W_i) @f$ the input layer @f$ x_i @f$. /// -# @f$ (N \times C \times H_0 \times W_0) @f$ the data layer @f$ x_0 @f$. /// </param> /// <param name="colTop">top otuput Blob vector (Length 1) /// -# @f$ (N \times 2 \times K*4) @f$ where @f$ K @f$ are the prior numbers. /// By default, a box of aspect ratio 1 and min_size and a box of aspect ratio 1 /// and sqrt(min_size * max_size) is created. /// </param> protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop) { int nLayerW = colBottom[0].width; int nLayerH = colBottom[0].height; int nImgW; int nImgH; if (m_nImgW == 0 || m_nImgH == 0) { nImgW = colBottom[1].width; nImgH = colBottom[1].height; } else { nImgW = m_nImgW; nImgH = m_nImgH; } float fStepW; float fStepH; if (m_fStepW == 0 || m_fStepH == 0) { fStepW = (float)nImgW / (float)nLayerW; fStepH = (float)nImgH / (float)nLayerH; } else { fStepW = m_fStepW; fStepH = m_fStepH; } float[] rgfTopData = Utility.ConvertVecF <T>(colTop[0].mutable_cpu_data); int nDim = nLayerH * nLayerW * m_nNumPriors * 4; int nIdx = 0; for (int h = 0; h < nLayerH; h++) { for (int w = 0; w < nLayerW; w++) { float fCenterX = (w + m_fOffset) * fStepW; float fCenterY = (h + m_fOffset) * fStepH; float fBoxWidth; float fBoxHeight; for (int s = 0; s < m_rgfMinSizes.Count; s++) { int nMinSize = (int)m_rgfMinSizes[s]; // first prior; aspect_ratio = 1, size = min_size fBoxHeight = nMinSize; fBoxWidth = nMinSize; // xmin rgfTopData[nIdx] = (fCenterX - fBoxWidth / 2.0f) / nImgW; nIdx++; // ymin rgfTopData[nIdx] = (fCenterY - fBoxHeight / 2.0f) / nImgH; nIdx++; // xmax rgfTopData[nIdx] = (fCenterX + fBoxWidth / 2.0f) / nImgW; nIdx++; // ymax rgfTopData[nIdx] = (fCenterY + fBoxHeight / 2.0f) / nImgH; nIdx++; if (m_rgfMaxSizes.Count > 0) { m_log.CHECK_EQ(m_rgfMinSizes.Count, m_rgfMaxSizes.Count, "The max_sizes and min_sizes must have the same count."); int nMaxSize = (int)m_rgfMaxSizes[s]; // second prior; aspect_ratio = 1, size = sqrt(min_size * max_size) fBoxWidth = (float)Math.Sqrt(nMinSize * nMaxSize); fBoxHeight = fBoxWidth; // xmin rgfTopData[nIdx] = (fCenterX - fBoxWidth / 2.0f) / nImgW; nIdx++; // ymin rgfTopData[nIdx] = (fCenterY - fBoxHeight / 2.0f) / nImgH; nIdx++; // xmax rgfTopData[nIdx] = (fCenterX + fBoxWidth / 2.0f) / nImgW; nIdx++; // ymax rgfTopData[nIdx] = (fCenterY + fBoxHeight / 2.0f) / nImgH; nIdx++; } // rest of priors for (int r = 0; r < m_rgfAspectRatios.Count; r++) { float fAr = m_rgfAspectRatios[r]; if (Math.Abs(fAr - 1.0f) < 1e-6f) { continue; } fBoxWidth = (float)(nMinSize * Math.Sqrt(fAr)); fBoxHeight = (float)(nMinSize / Math.Sqrt(fAr)); // xmin rgfTopData[nIdx] = (fCenterX - fBoxWidth / 2.0f) / nImgW; nIdx++; // ymin rgfTopData[nIdx] = (fCenterY - fBoxHeight / 2.0f) / nImgH; nIdx++; // xmax rgfTopData[nIdx] = (fCenterX + fBoxWidth / 2.0f) / nImgW; nIdx++; // ymax rgfTopData[nIdx] = (fCenterY + fBoxHeight / 2.0f) / nImgH; nIdx++; } } } } // Clip the prior's coordinate such that it is within [0,1] if (m_bClip) { for (int d = 0; d < nDim; d++) { rgfTopData[d] = Math.Min(Math.Max(rgfTopData[d], 0.0f), 1.0f); } } // Set the variance. int nTopOffset = colTop[0].offset(0, 1); if (m_rgfVariance.Count > 1) { int nCount = 0; for (int h = 0; h < nLayerH; h++) { for (int w = 0; w < nLayerW; w++) { for (int i = 0; i < m_nNumPriors; i++) { for (int j = 0; j < 4; j++) { rgfTopData[nTopOffset + nCount] = m_rgfVariance[j]; nCount++; } } } } } colTop[0].mutable_cpu_data = Utility.ConvertVec <T>(rgfTopData); if (m_rgfVariance.Count == 1) { colTop[0].SetData(m_rgfVariance[0], nTopOffset, nDim); } }
/// <summary> /// Setup the layer for use with both Engine.CAFFE and Engine.CUDNN modes. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { base.LayerSetUp(colBottom, colTop); if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes)) { return; } // Initialize CUDA streams and cuDNN. m_rghStream = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP]; m_rghCudnn = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP]; // Initialize algorithm arrays. m_rgfwdAlgo = new CONV_FWD_ALGO[colBottom.Count]; m_rgbwdFilterAlgo = new CONV_BWD_FILTER_ALGO[colBottom.Count]; m_rgbwdDataAlgo = new CONV_BWD_DATA_ALGO[colBottom.Count]; // Initialize the size arrays. m_rglWorkspaceFwdSizes = new ulong[colBottom.Count]; m_rglWorkspaceBwdFilterSizes = new ulong[colBottom.Count]; m_rglWorkspaceBwdDataSizes = new ulong[colBottom.Count]; m_rglWorkspaceFwdOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP]; m_rglWorkspaceBwdFilterOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP]; m_rglWorkspaceBwdDataOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP]; for (int i = 0; i < colBottom.Count; i++) { // initialize all to default algorithms. m_rgfwdAlgo[i] = (CONV_FWD_ALGO)0; m_rgbwdFilterAlgo[i] = (CONV_BWD_FILTER_ALGO)0; m_rgbwdDataAlgo[i] = (CONV_BWD_DATA_ALGO)0; // default algorithms don't require workspace. m_rglWorkspaceFwdSizes[i] = 0; m_rglWorkspaceBwdFilterSizes[i] = 0; m_rglWorkspaceBwdDataSizes[i] = 0; } for (int g = 0; g < m_nGroup * CUDNN_STREAMS_PER_GROUP; g++) { m_rghStream[g] = m_cuda.CreateStream(); m_rghCudnn[g] = m_cuda.CreateCuDNN(m_rghStream[g]); m_rglWorkspaceFwdOffsets[g] = 0; m_rglWorkspaceBwdFilterOffsets[g] = 0; m_rglWorkspaceBwdDataOffsets[g] = 0; } // Set the indexing parameters. m_nBiasOffset = m_nNumOutput / m_nGroup; // Create filter descriptor. Size szKernel = size_at(m_blobKernelShape); m_hFilterDesc = m_cuda.CreateFilterDesc(); m_cuda.SetFilterDesc(m_hFilterDesc, m_nNumOutput / m_nGroup, m_nChannels / m_nGroup, szKernel.Height, szKernel.Width, m_bUseHalfSize); // Create tensor descriptor(s) for data and corresponding convolution(s). for (int i = 0; i < colBottom.Count; i++) { m_rghBottomDesc.Add(m_cuda.CreateTensorDesc()); m_rghTopDesc.Add(m_cuda.CreateTensorDesc()); m_rghConvDesc.Add(m_cuda.CreateConvolutionDesc()); } // Tensor descriptor for bias. if (m_bBiasTerm) { m_hBiasDesc = m_cuda.CreateTensorDesc(); } }
/// <summary> /// Run the Backward computation using either the Engine.CAFFE or Engine.CUDNN mode as specified in the LayerParameter. /// </summary> /// <param name="colTop">top output Blob vector (length 1).</param> /// <param name="rgbPropagateDown">see Layer::Backward</param> /// <param name="colBottom">bottom input Blob vector (length 1).</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes)) { backward_cuda(colTop, rgbPropagateDown, colBottom); } else { backward_cudnn(colTop, rgbPropagateDown, colBottom); } }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { ScaleParameter p = m_param.scale_param; if (colBottom.Count == 1 && blobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else if (colBottom.Count == 1) { // scale is a learned parameter; initialize it. m_nAxis = colBottom[0].CanonicalAxisIndex(p.axis); int nNumAxes = p.num_axes; m_log.CHECK_GE(nNumAxes, -1, "num_axes must be non-negative, or -1 to extend to the end of bottom[0]."); if (nNumAxes >= 0) { m_log.CHECK_GE(colBottom[0].num_axes, m_nAxis + nNumAxes, "scale blob's shape extends past bottom[0]'s shape when applied starting with bottom[0] axis = " + m_nAxis.ToString()); } m_colBlobs = new BlobCollection <T>(); List <int> rgShape = new List <int>(); int nStart = m_nAxis; int nEnd = (nNumAxes == -1) ? colBottom[0].shape().Count : nStart + nNumAxes; for (int i = nStart; i < nEnd; i++) { rgShape.Add(colBottom[0].shape(i)); } Blob <T> blobScale = new Blob <T>(m_cuda, m_log); blobScale.Name = m_param.name + " scale"; blobScale.type = BLOB_TYPE.INTERNAL; if (!shareParameter(blobScale, rgShape)) { blobScale.Reshape(rgShape); FillerParameter fp = p.filler; // Default to unit (1) filler for identity operation. if (fp == null) { fp = new FillerParameter("constant", 1.0); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(blobScale); } m_colBlobs.Add(blobScale); } if (p.bias_term) { LayerParameter pb = new LayerParameter(LayerParameter.LayerType.BIAS); pb.bias_param.axis = p.axis; pb.bias_param.num_axes = (colBottom.Count > 1) ? colBottom[1].num_axes : p.num_axes; pb.bias_param.filler = p.bias_filler; m_colBiasBottomVec = new BlobCollection <T>(); m_colBiasBottomVec.Add(colBottom[0]); m_biasLayer = new BiasLayer <T>(m_cuda, m_log, pb); m_biasLayer.Setup(m_colBiasBottomVec, colTop); shareLayerBlobs(m_biasLayer); m_nBiasParamId = m_colBlobs.Count; m_colBlobs.Add(m_biasLayer.blobs[0]); m_rgbBiasPropagateDown = Utility.Create <bool>(1, false); } m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count(), true); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { base.LayerSetUp(colBottom, colTop); }
/// <summary> /// Forward computation. /// </summary> /// <param name="colBottom">bottom input blob vector (length 2+) /// -# @f$ (N \times C \times H \times W) @f$ the inputs. /// the inputs.</param> /// <param name="colTop">top output blob vector (length 1) /// -# @f$ (N \times CHW \times 1 \times 1) @f$ the outputs -- i.e., the (virtually) copied, flattened inputs /// </param> protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_cuda.copy(colTop[0].count(), colBottom[0].gpu_data, colTop[0].mutable_gpu_data); // colTop[0].ShareData(colBottom[0]); }
/// <summary> /// Setup the layer for use with both Engine.CAFFE and Engine.CUDNN modes. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop) { UnPoolingParameter p = m_param.unpooling_param; if (p.global_pooling) { m_log.CHECK(!(p.kernel_size.Count > 0 || p.kernel_h.HasValue || p.kernel_w.HasValue), "With global pooling = true, Filter size cannot be specified."); } else { m_log.CHECK(!(p.kernel_size.Count > 0) != !(p.kernel_h.HasValue && p.kernel_w.HasValue), "Filter size is kernel_size OR kernel_h and kernel_w; not both."); m_log.CHECK(p.kernel_size.Count > 0 || (p.kernel_h.HasValue && p.kernel_w.HasValue), "For non-square filters, both kernel_h and kernel_w are required."); } m_log.CHECK(((p.pad.Count > 0) && p.pad_h.HasValue && p.pad_w.HasValue) || (!p.pad_h.HasValue && !p.pad_w.HasValue), "Pad is pad or pad_h and pad_w are required."); m_log.CHECK(((p.stride.Count > 0) && p.stride_h.HasValue && p.stride_w.HasValue) || (!p.stride_h.HasValue && !p.stride_w.HasValue), "Stride is stride or stride_h and stride_w are required."); m_bGlobalPooling = p.global_pooling; //---- Kernel Size ---- if (m_bGlobalPooling) { m_nKernelH = colBottom[0].height; m_nKernelW = colBottom[0].width; } else { if (p.kernel_size.Count > 0) { m_nKernelH = (int)p.kernel_size[0]; m_nKernelW = (int)p.kernel_size[0]; } else { m_nKernelH = (int)p.kernel_h.Value; m_nKernelW = (int)p.kernel_w.Value; } } m_log.CHECK_GT(m_nKernelH, 0, "Filter dimensions cannot be zero."); m_log.CHECK_GT(m_nKernelW, 0, "Filter dimensions cannot be zero."); //---- Pad ---- if (p.pad.Count > 0) { m_nPadH = (int)p.pad[0]; m_nPadW = (int)p.pad[0]; } else { m_nPadH = (p.pad_h.HasValue) ? (int)p.pad_h.Value : 0; m_nPadW = (p.pad_w.HasValue) ? (int)p.pad_w.Value : 0; } //---- Stride ---- if (p.stride.Count > 0) { m_nStrideH = (int)p.stride[0]; m_nStrideW = (int)p.stride[0]; } else { m_nStrideH = (p.stride_h.HasValue) ? (int)p.stride_h.Value : 1; m_nStrideW = (p.stride_w.HasValue) ? (int)p.stride_w.Value : 1; } if (m_bGlobalPooling) m_log.CHECK(m_nPadH == 0 && m_nPadW == 0 && m_nStrideH == 1 && m_nStrideW == 1, "With global pooling = true, only pad = 0 and stride = 1 allowed."); if (m_nPadH != 0 || m_nPadW != 0) { m_log.CHECK(m_param.unpooling_param.pool == PoolingParameter.PoolingMethod.AVE || m_param.unpooling_param.pool == PoolingParameter.PoolingMethod.MAX, "Padding implemented for AVE and MAX pooling only."); m_log.CHECK_LT(m_nPadH, m_nKernelH, "The pad_h must be <= kernel_h."); m_log.CHECK_LT(m_nPadW, m_nKernelW, "The pad_w must be <= kernel_w."); } }
/// <summary> /// Computes the infogain loss error gradient w.r.t the predictions. /// </summary> /// <remarks> /// Gradients cannot be computed with respect to the label inputs (bottom[1]), /// so this method ignores bottom[1] and requires !propagate_down[1], crashing /// if propagate_down[1] == true. /// </remarks> /// <param name="colTop">top output blob vector (length 1), providing the error gradient with /// respect to the outputs. /// -# @f$ (1 \times 1 \times 1 \times 1) @f$ /// This blob's diff will simply contain the loss_weight * @f$ \lambda @f$ as /// @f$ \lambda @f$ is the coefficient of this layer's output /// @f$ \ell_i @f$ in the overall Net loss. /// @f$ E = \lambda_i \ell_i + \mbox{other loss terms} @f$; hence @f$ /// \frac{partial E}{\partial \ell_i} = \lambda_i @f$ /// (*Assuming that this top blob is not used as a bottom (input) by any /// other layer of the Net.) /// </param> /// <param name="rgbPropagateDown">see Layer::Backward. propagate_down[1] must be false as /// we can't compute gradients with respect to the labels (similarly for progagate_down[2] and /// the infogain matrix, if provided as bottom[2]).</param> /// <param name="colBottom">bottom input blob vector (length 2) /// -# @f$ (N \times C \times H \times W) @f$ /// the predictions @f$ \hat{p} @f$; backward computes diff /// @f$ /// \frac{\partial E}{\partial \hat{p}} /// @f$ /// -# @f$ (N \times 1 \times 1 \times 1) @f$ /// the labels -- ignored as we can't compute their error gradients. /// </param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (rgbPropagateDown[1]) { m_log.FAIL(type.ToString() + " Layer cannot backpropagate to label inputs."); } if (rgbPropagateDown[0]) { int nNum = colBottom[0].num; int nDim = colBottom[0].count() / nNum; double dfScale = -1 * convertD(colTop[0].GetDiff(0)) / nNum; colBottom[0].SetDiff(0); if (typeof(T) == typeof(double)) { double[] rgBottomData = (double[])Convert.ChangeType(colBottom[0].update_cpu_data(), typeof(double[])); double[] rgBottomLabel = (double[])Convert.ChangeType(colBottom[1].update_cpu_data(), typeof(double[])); double[] rgBottomDiff = (double[])Convert.ChangeType(colBottom[0].mutable_cpu_diff, typeof(double[])); for (int i = 0; i < nNum; i++) { int nLabel = (int)rgBottomLabel[i]; double dfProb = Math.Max(rgBottomData[i * nDim + nLabel], kLOG_THRESHOLD); rgBottomDiff[i * nDim + nLabel] = dfScale / dfProb; } colBottom[0].mutable_cpu_data = (T[])Convert.ChangeType(rgBottomDiff, typeof(T[])); } else { float[] rgBottomData = (float[])Convert.ChangeType(colBottom[0].update_cpu_data(), typeof(float[])); float[] rgBottomLabel = (float[])Convert.ChangeType(colBottom[1].update_cpu_data(), typeof(float[])); float[] rgBottomDiff = (float[])Convert.ChangeType(colBottom[0].mutable_cpu_diff, typeof(float[])); for (int i = 0; i < nNum; i++) { int nLabel = (int)rgBottomLabel[i]; double dfProb = Math.Max(rgBottomData[i * nDim + nLabel], kLOG_THRESHOLD); rgBottomDiff[i * nDim + nLabel] = (float)(dfScale / dfProb); } colBottom[0].mutable_cpu_data = (T[])Convert.ChangeType(rgBottomDiff, typeof(T[])); } } }
/// <summary> /// Computes the error gradient w.r.t. the input. /// </summary> /// <param name="colTop">top output Blob vector (length 1).</param> /// <param name="rgbPropagateDown">see Layer::Backward</param> /// <param name="colBottom">bottom input Blob vector (length 1).</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { int nItemCount = colTop[0].count(m_nAxis); m_log.CHECK_EQ(nItemCount, m_colBuckets.Count, "The count at the top[axis] is incorrect!"); int nCount1 = colTop[0].count(0, m_nAxis); int nCount2 = colBottom[0].count(0, m_nAxis); m_log.CHECK_EQ(nCount1, nCount2, "The top and bottom have incompatible sizes."); // Convert top one-hot vectors to softmax indexes. float[] rgBottomDiff = convertF(colBottom[0].mutable_cpu_diff); float[] rgTopData = convertF(colTop[0].mutable_cpu_data); float[] rgTopDiff = convertF(colTop[0].mutable_cpu_diff); for (int i = 0; i < nCount1; i++) { int nItemIdx = i * nItemCount; float fDiff = 0; float fDiffSum = 0; for (int j = 0; j < nItemCount; j++) { fDiff = rgTopDiff[nItemIdx + j]; if (rgTopData[nItemIdx + j] == 0) { fDiff *= -1; } fDiffSum += fDiff; } rgBottomDiff[i] = fDiffSum / nItemCount; } colBottom[0].mutable_cpu_diff = convert(rgBottomDiff); }
/// <summary> /// Forward computation. /// </summary> /// <param name="colBottom">bottom input blob vector (length 2+) /// -# @f$ (N \times C \times H \times W) @f$ the inputs. /// the inputs.</param> /// <param name="colTop">top output blob vector (length 1) /// -# @f$ (N \times CHW \times 1 \times 1) @f$ the outputs -- i.e., the (virtually) copied, flattened inputs /// </param> protected override void forward(BlobCollection <T> colBottom, BlobCollection <T> colTop) { colTop[0].ShareData(colBottom[0]); }
/// <summary> /// Computes the error gradient w.r.t. the concatenate inputs. /// </summary> /// <param name="colTop">top output Blob vecotr (length 1), /// providing the error gradient with respect to the outputs.</param> /// <param name="rgbPropagateDown">see Layer::Backward</param> /// <param name="colBottom">input Blob vecotor (length @f$ k @f$), into which the top error /// gradient is (virtually) copied.</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { colBottom[0].ShareDiff(colTop[0]); }
/// <summary> /// Setup the DataLayer by starting up the pre-fetching. /// </summary> /// <param name="colBottom">Not used.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> protected override void DataLayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { int nBatchSize = (int)m_param.data_param.batch_size; m_cursor = m_db.NewCursor(m_transformer); foreach (BatchSampler sampler in m_param.annotated_data_param.batch_sampler) { m_rgBatchSamplers.Add(sampler); } m_strLabelMapFile = m_param.annotated_data_param.label_map_file; // Make sure dimension is consistent within batch. if (m_param.transform_param.resize_param != null && m_param.transform_param.resize_param.Active) { if (m_param.transform_param.resize_param.resize_mode == ResizeParameter.ResizeMode.FIT_SMALL_SIZE) { m_log.CHECK_EQ(nBatchSize, 1, "The FIT_MSALL_SIZE resize mode only supports a batch size of 1."); } } // Read a data point, and use it to initialize the top blob. SimpleDatum anno_datum = m_cursor.GetValue(null, true); // Use data_transformer to infer the expected blob shape from anno_datum. List <int> rgTopShape = m_transformer.InferBlobShape(anno_datum); // Reshape top[0] and prefetch_data according to the batch_size. rgTopShape[0] = nBatchSize; colTop[0].Reshape(rgTopShape); for (int i = 0; i < m_rgPrefetch.Length; i++) { m_rgPrefetch[i].Data.Reshape(rgTopShape); } m_log.WriteLine("Output data size: " + colTop[0].ToSizeString()); // Label if (m_bOutputLabels) { bool bHasAnnoType = (anno_datum.annotation_type != SimpleDatum.ANNOTATION_TYPE.NONE) || (m_param.annotated_data_param.anno_type != SimpleDatum.ANNOTATION_TYPE.NONE); List <int> rgLabelShape = Utility.Create <int>(4, 1); if (bHasAnnoType) { m_AnnoType = anno_datum.annotation_type; // If anno_type is provided in AnnotatedDataParameter, replace the type stored // in each individual AnnotatedDatum. if (m_param.annotated_data_param.anno_type != SimpleDatum.ANNOTATION_TYPE.NONE) { m_log.WriteLine("WARNING: Annotation type stored in AnnotatedDatum is shadowed."); m_AnnoType = m_param.annotated_data_param.anno_type; } // Infer the label shape from anno_dataum.AnnotationGroup(). int nNumBboxes = 0; // Since the number of bboxes can be different for each image, // we store the bbox information in a specific format. In specific: // All bboxes are stored in one spatial plane (num and channels are 1) // and each row contains one and only one box in the following format: // [item_id, group_label, instance_id, xmin, ymin, xmax, ymax, diff] // Note: Refer to caffe.proto for details about group_label and // instance_id. if (m_AnnoType == SimpleDatum.ANNOTATION_TYPE.BBOX) { if (anno_datum.annotation_group != null) { for (int g = 0; g < anno_datum.annotation_group.Count; g++) { nNumBboxes += anno_datum.annotation_group[g].annotations.Count; } } rgLabelShape[0] = 1; rgLabelShape[1] = 1; // BasePrefetchingDataLayer.LayerSetup() requires to call // cpu_data and gpu_data for consistent prefetch thread, thus // we must make sure there is at least one bbox. rgLabelShape[2] = Math.Max(nNumBboxes, 1); rgLabelShape[3] = 8; } else { m_log.FAIL("Unknown annotation type."); } } else { rgLabelShape[0] = nBatchSize; } colTop[1].Reshape(rgLabelShape); for (int i = 0; i < m_rgPrefetch.Length; i++) { m_rgPrefetch[i].Label.Reshape(rgLabelShape); } } }
/// <summary> /// Backward passthrough /// </summary> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> /// <param name="rgbPropagateDown">Specifies whether or not to propagate each blob back.</param> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { // Copy the diff into the batch storage. m_cuda.copy(colTop[0].count(), colTop[0].gpu_diff, m_rgBatchData[m_nLastBatchIdx].mutable_gpu_diff); m_cuda.copy(colTop[0].count(), colTop[0].gpu_diff, colBottom[0].mutable_gpu_diff); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { PriorBoxParameter p = m_param.prior_box_param; m_log.CHECK_GT(p.min_size.Count, 0, "Must provied at least one min_size!"); for (int i = 0; i < p.min_size.Count; i++) { float fMin = p.min_size[i]; m_log.CHECK_GT(fMin, 0, "min_size must be positive greater than zero."); m_rgfMinSizes.Add(fMin); } m_rgfAspectRatios = PriorBoxParameter.GetAspectRatios(p); m_bFlip = p.flip; m_nNumPriors = m_rgfAspectRatios.Count * m_rgfMinSizes.Count; if (p.max_size.Count > 0) { m_log.CHECK_EQ(p.min_size.Count, p.max_size.Count, "The max_size count must equal the min_size count!"); for (int i = 0; i < p.max_size.Count; i++) { float fMax = p.max_size[i]; m_log.CHECK_GT(fMax, m_rgfMinSizes[i], "The max_size must be greater than the min_size."); m_rgfMaxSizes.Add(fMax); m_nNumPriors++; } } m_bClip = p.clip; if (p.variance.Count > 1) { // Must and only provide 4 variance values. m_log.CHECK_EQ(p.variance.Count, 4, "Must only have 4 variance values."); for (int i = 0; i < p.variance.Count; i++) { float fVar = p.variance[i]; m_log.CHECK_GT(fVar, 0, "The variance values must be greater than zero."); m_rgfVariance.Add(fVar); } } else if (p.variance.Count == 1) { float fVar = p.variance[0]; m_log.CHECK_GT(fVar, 0, "The variance value must be greater than zero."); m_rgfVariance.Add(fVar); } else { // Set default to 0.1. m_rgfVariance.Add(0.1f); } if (p.img_h.HasValue || p.img_w.HasValue) { m_log.CHECK(!p.img_size.HasValue, "Either img_size or img_h/img_w should be specified; but not both."); m_nImgH = (int)p.img_h.Value; m_log.CHECK_GT(m_nImgH, 0, "The img_h should be greater than 0."); m_nImgW = (int)p.img_w.Value; m_log.CHECK_GT(m_nImgW, 0, "The img_w should be greater than 0."); } else if (p.img_size.HasValue) { int nImgSize = (int)p.img_size.Value; m_log.CHECK_GT(nImgSize, 0, "The img_size should be greater than 0."); m_nImgH = nImgSize; m_nImgW = nImgSize; } else { m_nImgH = 0; m_nImgW = 0; } if (p.step_h.HasValue || p.step_w.HasValue) { m_log.CHECK(!p.step.HasValue, "Either step_size or step_h/step_w should be specified; but not both."); m_fStepH = p.step_h.Value; m_log.CHECK_GT(m_nImgH, 0, "The step_h should be greater than 0."); m_fStepW = p.step_w.Value; m_log.CHECK_GT(m_nImgW, 0, "The step_w should be greater than 0."); } else if (p.step.HasValue) { float fStep = p.step.Value; m_log.CHECK_GT(fStep, 0, "The step should be greater than 0."); m_fStepH = fStep; m_fStepW = fStep; } else { m_fStepH = 0; m_fStepW = 0; } m_fOffset = p.offset; }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { // Configure the kernel size, padding, stride and inputs. ConvolutionParameter p = m_param.convolution_param; m_bForceNDim2col = p.force_nd_im2col; m_nChannelAxis = colBottom[0].CanonicalAxisIndex(p.axis); int nFirstSpatialAxis = m_nChannelAxis + 1; int nNumAxes = colBottom[0].num_axes; m_nNumSpatialAxes = nNumAxes - nFirstSpatialAxis; m_log.CHECK_GE(m_nNumSpatialAxes, 0, "The number of spatial axes must be zero or greater."); List <int> rgBottomDimBlobShape = new List <int>() { m_nNumSpatialAxes + 1 }; List <int> rgSpaitalDimBlobShape = new List <int>() { Math.Max(m_nNumSpatialAxes, 1) }; // Setup filter kernel dimensions (blobKernelShape) m_blobKernelShape.Reshape(rgSpaitalDimBlobShape); T[] rgKernelShape = m_blobKernelShape.mutable_cpu_data; if (p.kernel_h.HasValue || p.kernel_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "kernel_h & kernel_w can only be used in 2D convolution."); m_log.CHECK_EQ(0, p.kernel_size.Count, "Either kernel_size or kernel_h/w should be specified; not both."); rgKernelShape[0] = (T)Convert.ChangeType(p.kernel_h.Value, typeof(T)); rgKernelShape[1] = (T)Convert.ChangeType(p.kernel_w.Value, typeof(T)); } else { int nNumKernelDims = p.kernel_size.Count; m_log.CHECK(nNumKernelDims == 1 || nNumKernelDims == m_nNumSpatialAxes, "Kernel size must be specified once, or once per spatial dimension (kernel_size specified " + nNumKernelDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); for (int i = 0; i < m_nNumSpatialAxes; i++) { int nIdx = (nNumKernelDims == 1) ? 0 : i; rgKernelShape[i] = (T)Convert.ChangeType(p.kernel_size[nIdx], typeof(T)); } } for (int i = 0; i < m_nNumSpatialAxes; i++) { m_log.CHECK_GT((int)Convert.ChangeType(rgKernelShape[i], typeof(int)), 0, "Filter dimension must be non-zero."); } m_blobKernelShape.mutable_cpu_data = rgKernelShape; // Setup stride dimensions (blobStride) m_blobStride.Reshape(rgSpaitalDimBlobShape); T[] rgStrideData = m_blobStride.mutable_cpu_data; if (p.stride_h.HasValue || p.stride_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "stride_h & stride_w can only be used in 2D convolution."); m_log.CHECK_EQ(0, p.stride.Count, "Either stride_size or stride_h/w should be specified; not both."); rgStrideData[0] = (T)Convert.ChangeType(p.stride_h.Value, typeof(T)); rgStrideData[1] = (T)Convert.ChangeType(p.stride_w.Value, typeof(T)); } else { int nNumStrideDims = p.stride.Count; m_log.CHECK(nNumStrideDims == 0 || nNumStrideDims == 1 || nNumStrideDims == m_nNumSpatialAxes, "Stride size must be specified once, or once per spatial dimension (stride specified " + nNumStrideDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); int nDefaultStride = 1; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (nNumStrideDims == 0) { rgStrideData[i] = (T)Convert.ChangeType(nDefaultStride, typeof(T)); } else { int nIdx = (nNumStrideDims == 1) ? 0 : i; rgStrideData[i] = (T)Convert.ChangeType(p.stride[nIdx], typeof(T)); } m_log.CHECK_GT((int)Convert.ChangeType(rgStrideData[i], typeof(int)), 0, "Stride dimension must be non-zero."); } } m_blobStride.mutable_cpu_data = rgStrideData; // Setup pad dimensions (blobPad) m_blobPad.Reshape(rgSpaitalDimBlobShape); T[] rgPadData = m_blobPad.mutable_cpu_data; if (p.pad_h.HasValue || p.pad_w.HasValue) { m_log.CHECK_EQ(m_nNumSpatialAxes, 2, "pad_h & pad_w can only be used in 2D convolution."); m_log.CHECK_EQ(0, p.pad.Count, "Either pad_size or pad_h/w should be specified; not both."); rgPadData[0] = (T)Convert.ChangeType(p.pad_h.Value, typeof(T)); rgPadData[1] = (T)Convert.ChangeType(p.pad_w.Value, typeof(T)); } else { int nNumPadDims = p.pad.Count; m_log.CHECK(nNumPadDims == 0 || nNumPadDims == 1 || nNumPadDims == m_nNumSpatialAxes, "Pad size must be specified once, or once per spatial dimension (pad specified " + nNumPadDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); int nDefaultPad = 0; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (nNumPadDims == 0) { rgPadData[i] = (T)Convert.ChangeType(nDefaultPad, typeof(T)); } else { int nIdx = (nNumPadDims == 1) ? 0 : i; rgPadData[i] = (T)Convert.ChangeType(p.pad[nIdx], typeof(T)); } } } m_blobPad.mutable_cpu_data = rgPadData; // Setup dilation dimensions (blobDilation) m_blobDilation.Reshape(rgSpaitalDimBlobShape); T[] rgDilationData = m_blobDilation.mutable_cpu_data; int nNumDilationDims = p.dilation.Count; m_log.CHECK(nNumDilationDims == 0 || nNumDilationDims == 1 || nNumDilationDims == m_nNumSpatialAxes, "Dilation size must be specified once, or once per spatial dimension (dilation specified " + nNumDilationDims.ToString() + " times; " + m_nNumSpatialAxes.ToString() + " spatial dims);"); int nDefaultDilation = 1; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (nNumDilationDims == 0) { rgDilationData[i] = (T)Convert.ChangeType(nDefaultDilation, typeof(T)); } else { int nIdx = (nNumDilationDims == 1) ? 0 : i; rgDilationData[i] = (T)Convert.ChangeType(p.dilation[nIdx], typeof(T)); } } m_blobDilation.mutable_cpu_data = rgDilationData; // Special case: im2col is the identity for 1x1 convolution with stride 1 // add no padding, so flag for skipping the buffer and transformation. m_bIs1x1 = true; for (int i = 0; i < m_nNumSpatialAxes; i++) { if (!(val_at(rgKernelShape, i) == 1 && val_at(rgStrideData, i) == 1 && val_at(rgPadData, i) == 0)) { m_bIs1x1 = false; break; } } // Configure output channels and groups. m_nChannels = colBottom[0].shape(m_nChannelAxis); m_nNumOutput = (int)p.num_output; m_log.CHECK_GT(m_nNumOutput, 0, "Output count must be greater than zero."); m_nGroup = (int)p.group; m_log.CHECK_EQ(m_nChannels % m_nGroup, 0, "The channels must span evenly across the groups."); m_log.CHECK_EQ(m_nNumOutput % m_nGroup, 0, "The number of output should be a in multiples of group."); if (reverse_dimensions()) { m_nConvOutChannels = m_nChannels; m_nConvInChannels = m_nNumOutput; } else { m_nConvOutChannels = m_nNumOutput; m_nConvInChannels = m_nChannels; } // Handle the parameters: weights and biases // - blobs[0] holds the filter weights. // - blobs[1] holds the biases (optional) List <int> rgWeightShape = new List <int>(); rgWeightShape.Add(m_nConvOutChannels); rgWeightShape.Add(m_nConvInChannels / m_nGroup); for (int i = 0; i < m_nNumSpatialAxes; i++) { rgWeightShape.Add(val_at(rgKernelShape, i)); } m_bBiasTerm = p.bias_term; List <int> rgBiasShape = new List <int>() { m_nNumOutput }; if (m_colBlobs.Count > 0) { m_log.CHECK_EQ(1 + ((m_bBiasTerm) ? 1 : 0), m_colBlobs.Count, "Incorrect number of weight blobs."); if (!Utility.Compare <int>(rgWeightShape, m_colBlobs[0].shape())) { Blob <T> b = new Blob <T>(m_cuda, m_log, rgWeightShape); m_log.FAIL("Incorrect weight shape: expected shape " + b.shape_string + "; instead, shape was " + m_colBlobs[0].shape_string); } if (m_bBiasTerm && !Utility.Compare <int>(rgBiasShape, m_colBlobs[1].shape())) { Blob <T> b = new Blob <T>(m_cuda, m_log, rgBiasShape); m_log.FAIL("Incorrect bias shape: expected shape " + b.shape_string + "; instead, shape was " + m_colBlobs[1].shape_string); } m_log.WriteLine("Skipping parameter initialization."); } else { m_colBlobs.Clear(); // Initialize and fill the weights: // output channels x input channels per-group x kernel height x kernel width. Blob <T> blobWts = new Blob <T>(m_cuda, m_log); blobWts.Name = colTop[0].Name + " weights"; if (!shareParameter(blobWts, rgWeightShape)) { blobWts.Reshape(rgWeightShape); Filler <T> wtFiller = Filler <T> .Create(m_cuda, m_log, p.weight_filler); wtFiller.Fill(blobWts); } m_colBlobs.Add(blobWts); // If necessary, initialize and fill the biases: if (m_bBiasTerm) { Blob <T> blobBias = new Blob <T>(m_cuda, m_log); blobBias.Name = colTop[0].Name + " bias"; if (!shareParameter(blobBias, rgBiasShape)) { blobBias.Reshape(rgBiasShape); Filler <T> biasFiller = Filler <T> .Create(m_cuda, m_log, p.bias_filler); biasFiller.Fill(blobBias); } m_colBlobs.Add(blobBias); } } m_nKernelDim = m_colBlobs[0].count(1); m_nWeightOffset = m_nConvOutChannels * m_nKernelDim / m_nGroup; // Propagate gradients to the parameters (as directed by backward pass). m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count, true); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_rgOneHotVector = new float[m_param.onehot_param.num_output]; m_colBuckets = new BucketCollection(m_param.onehot_param.min, m_param.onehot_param.max, (int)m_param.onehot_param.num_output); m_nAxis = colBottom[0].CanonicalAxisIndex(m_param.onehot_param.axis); }
/// <summary> /// Reshape the bottom (input) and top (output) blobs. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void Reshape(BlobCollection <T> colBottom, BlobCollection <T> colTop) { int nFirstSpatialAxis = m_nChannelAxis + 1; m_log.CHECK_EQ(colBottom[0].num_axes, nFirstSpatialAxis + m_nNumSpatialAxes, "bottom num_axes may not change."); m_nNum = colBottom[0].count(0, m_nChannelAxis); m_log.CHECK_EQ(colBottom[0].shape(m_nChannelAxis), m_nChannels, "Input size incompatible with convolution kernel."); // TODO: generalize to handle inputs of different shapes. for (int i = 1; i < colBottom.Count; i++) { m_log.CHECK(Utility.Compare <int>(colBottom[0].shape(), colBottom[i].shape()), "Shape mismatch - bottom[0]: '" + colBottom[0].shape_string + "' vs. bottom[" + i.ToString() + "]: '" + colBottom[i].shape_string + "'"); } // Shape the tops. m_rgBottomShape = Utility.Clone <int>(colBottom[0].shape()); compute_output_shape(); List <int> rgTopShape = new List <int>(); for (int i = 0; i < m_nChannelAxis; i++) { rgTopShape.Add(colBottom[0].shape(i)); } rgTopShape.Add(m_nNumOutput); for (int i = 0; i < m_nNumSpatialAxes; i++) { rgTopShape.Add(m_rgOutputShape[i]); } for (int i = 0; i < colTop.Count; i++) { colTop[i].Reshape(rgTopShape); } if (reverse_dimensions()) { m_nConvOutSpatialDim = colBottom[0].count(nFirstSpatialAxis); } else { m_nConvOutSpatialDim = colTop[0].count(nFirstSpatialAxis); } m_nColOffset = m_nKernelDim * m_nConvOutSpatialDim; m_nOutputOffset = m_nConvOutChannels * m_nConvOutSpatialDim / m_nGroup; if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes) || reverse_dimensions()) { // Setup input dimensions (blobConvInputShape) List <int> rgBottomDimBlobShape = new List <int>() { m_nNumSpatialAxes + 1 }; m_blobConvInputShape.Reshape(rgBottomDimBlobShape); T[] rgConvInputShapeData = m_blobConvInputShape.mutable_cpu_data; for (int i = 0; i < m_nNumSpatialAxes + 1; i++) { if (reverse_dimensions()) { rgConvInputShapeData[i] = (T)Convert.ChangeType(colTop[0].shape(m_nChannelAxis + i), typeof(T)); } else { rgConvInputShapeData[i] = (T)Convert.ChangeType(colBottom[0].shape(m_nChannelAxis + i), typeof(T)); } } m_blobConvInputShape.mutable_cpu_data = rgConvInputShapeData; // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. m_rgColBufferShape = new List <int>(); m_rgColBufferShape.Add(m_nKernelDim * m_nGroup); for (int i = 0; i < m_nNumSpatialAxes; i++) { if (reverse_dimensions()) { m_rgColBufferShape.Add(input_shape(i + 1)); } else { m_rgColBufferShape.Add(m_rgOutputShape[i]); } } shareLayerBlob(m_blobColBuffer, m_rgColBufferShape); m_blobColBuffer.Reshape(m_rgColBufferShape); } m_nBottomDim = colBottom[0].count(m_nChannelAxis); m_nTopDim = colTop[0].count(m_nChannelAxis); m_nNumKernelsIm2col = m_nConvInChannels * m_nConvOutSpatialDim; m_nNumKernelsCol2im = (reverse_dimensions()) ? m_nTopDim : m_nBottomDim; // Setup up the all ones 'bias_multiplier' for adding biases by BLAS m_nOutSpatialDim = colTop[0].count(nFirstSpatialAxis); if (m_bBiasTerm) { if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes) || reverse_dimensions()) { List <int> rgBiasMultShape = new List <int>() { m_nOutSpatialDim }; shareLayerBlob(m_blobBiasMultiplier, rgBiasMultShape); m_blobBiasMultiplier.Reshape(rgBiasMultShape); m_blobBiasMultiplier.SetData(1.0); } } }
/// <summary> /// Computes the error gradient w.r.t. the absolute value inputs. /// </summary> /// <param name="colTop">top output blob vector (length 1), providing the error gradient /// with respect to outputs /// -# @f$ (N \times C \times H \times W) @f$ /// containing error gradients @f$ \frac{\partial E}{\partial y} @f$ with /// respect to computed outputs.</param> /// <param name="rgbPropagateDown">propagate_down see Layer::Backward.</param> /// <param name="colBottom">bottom input blob vector (length 2) /// -# @f$ (N \times C \times H \times W) @f$ /// the inputs @f$ x @f$; Backward fills their diff with gradients, /// if propagate_down[0] == true.</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (OnBackward != null) { OnBackward(this, new BackwardArgs <T>(colTop, rgbPropagateDown, colBottom)); return; } for (int i = 0; i < colBottom.Count && i < colTop.Count; i++) { if (rgbPropagateDown[i]) { int nCount = colTop[i].count(); int nCountB = colBottom[i].count(); m_log.CHECK_EQ(nCount, nCountB, "The top and bottom at " + i.ToString() + " must have the same number of items."); long hBottomDiff = colBottom[i].mutable_gpu_diff; long hTopDiff = colTop[i].gpu_diff; m_cuda.copy(nCount, hTopDiff, hBottomDiff); } } }
/// <summary> /// Computes the error gradient w.r.t. the concatenate inputs. /// </summary> /// <param name="colTop">top output Blob vecotr (length 1), /// providing the error gradient with respect to the outputs.</param> /// <param name="rgbPropagateDown">see Layer::Backward</param> /// <param name="colBottom">input Blob vecotor (length @f$ k @f$), into which the top error /// gradient is (virtually) copied.</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { m_cuda.copy(colBottom[0].count(), colTop[0].gpu_diff, colBottom[0].mutable_gpu_diff); // colBottom[0].ShareDiff(colTop[0]); }
/// <summary> /// Computes the error gradient w.r.t. the reordered input. /// </summary> /// <param name="colTop">top output Blob vector (length 1), /// providing the error gradient with respect to the outputs /// -# @f$ (M \times ...) @f$: /// containing error gradients @f$ \frac{\partial E}{\partial y} @f$ /// with respect to concatenated outputs @f$ y @f$.</param> /// <param name="rgbPropagateDown">see Layer::Backward</param> /// <param name="colBottom">bottom input Blob vector (length 2): /// -# @f$ \frac{\partial E}{\partial y} @f$ is de-indexed (summing where /// required) back to the input @f$ x_1 @f$. /// -# This layer cannot backprop to @f$ x_2 @f$, i.e. propagate{down[1] must be /// false.</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { m_log.CHECK(!rgbPropagateDown[1], "Cannot backprop to index."); if (!rgbPropagateDown[0]) { return; } List <KeyValuePair <int, int> > rgMapping = new List <KeyValuePair <int, int> >(); T[] rgData = colBottom[1].update_cpu_data(); if (typeof(T) == typeof(double)) { double[] rgPerm = (double[])Convert.ChangeType(rgData, typeof(double[])); for (int i = 0; i < colBottom[1].count(); i++) { rgMapping.Add(new KeyValuePair <int, int>((int)rgPerm[i], i)); } } else { float[] rgPerm = (float[])Convert.ChangeType(rgData, typeof(float[])); for (int i = 0; i < colBottom[1].count(); i++) { rgMapping.Add(new KeyValuePair <int, int>((int)rgPerm[i], i)); } } rgMapping.Sort(new Comparison <KeyValuePair <int, int> >(sort)); // Each element of the bottom diff is potentially the sum of many top diffs. // However, we'd like each CUDA thread to handle exactly one output. Hence, // we first pre-compute a list of lists of indices that need to be summed for // each output. 'top_indexes' holds the data of this list of lists. The // k'th element of 'begins' points to the location in 'top_indexes' where the // list for the k'th example begin, and the kth element of 'counts' is the // length of that list. m_blobBegins.SetData(-1); m_blobCounts.SetData(0); T[] rgTopIndexes = m_blobTopIndexes.mutable_cpu_data; T[] rgCounts = m_blobCounts.mutable_cpu_data; T[] rgBegins = m_blobBegins.mutable_cpu_data; if (typeof(T) == typeof(double)) { double[] t_i_data = (double[])Convert.ChangeType(rgTopIndexes, typeof(double[])); double[] c_data = (double[])Convert.ChangeType(rgCounts, typeof(double[])); double[] b_data = (double[])Convert.ChangeType(rgBegins, typeof(double[])); for (int i = 0; i < rgMapping.Count; i++) { t_i_data[i] = rgMapping[i].Value; if (b_data[rgMapping[i].Key] == -1) { b_data[rgMapping[i].Key] = i; } c_data[rgMapping[i].Key] += 1; } } else { float[] t_i_data = (float[])Convert.ChangeType(rgTopIndexes, typeof(float[])); float[] c_data = (float[])Convert.ChangeType(rgCounts, typeof(float[])); float[] b_data = (float[])Convert.ChangeType(rgBegins, typeof(float[])); for (int i = 0; i < rgMapping.Count; i++) { t_i_data[i] = rgMapping[i].Value; if (b_data[rgMapping[i].Key] == -1) { b_data[rgMapping[i].Key] = i; } c_data[rgMapping[i].Key] += 1; } } m_blobTopIndexes.mutable_cpu_data = rgTopIndexes; m_blobCounts.mutable_cpu_data = rgCounts; m_blobBegins.mutable_cpu_data = rgBegins; int nCount = colBottom[0].count(); m_cuda.batchreidx_bwd(nCount, colBottom[0].count() / colBottom[0].shape(0), colTop[0].gpu_diff, m_blobTopIndexes.gpu_data, m_blobBegins.gpu_data, m_blobCounts.gpu_data, colBottom[0].mutable_gpu_diff); }
/// @brief Currently, not implemented. protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom) { throw new NotImplementedException("UnPooling does not support the backward operation."); }
/// <summary> /// Computes the error gradient w.r.t. the input. /// </summary> /// <param name="colTop">top output Blob vector (length 1).</param> /// <param name="rgbPropagateDown">see Layer::Backward</param> /// <param name="colBottom">bottom input Blob vector (length 1-2).</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (rgbPropagateDown[0] && colBottom[0] != colTop[0]) { long hTopDiff = colTop[0].gpu_diff; long hBottomDiff = colBottom[0].mutable_gpu_diff; int nCount = colBottom[0].count(); m_cuda.copy(nCount, hTopDiff, hBottomDiff); } // in-place, we don't need to do anyting with the data diff. bool bBiasParam = (colBottom.Count == 1) ? true : false; if ((!bBiasParam && rgbPropagateDown[1]) || (bBiasParam && m_rgbParamPropagateDown[0])) { long hTopDiff = colTop[0].gpu_diff; long hBiasDiff = (bBiasParam) ? m_colBlobs[0].mutable_gpu_diff : colBottom[1].mutable_gpu_diff; double dfAccum = (bBiasParam) ? 1.0 : 0.0; int nTopDiffOffset = 0; for (int n = 0; n < m_nOuterDim; n++) { m_cuda.gemv(false, m_nBiasDim, m_nInnerDim, m_tOne, hTopDiff, m_blobBiasMultiplier.gpu_data, convert(dfAccum), hBiasDiff, nTopDiffOffset); nTopDiffOffset += m_nDim; dfAccum = 1.0; } } }
/// <summary> /// Computes the error gradient w.r.t the inputs. /// </summary> /// <param name="colTop">top output Blob vector (Length 1), providing the error gradient /// with respect to computed outputs.</param> /// <param name="rgbPropagateDown">propagate down see Layer::Backward</param> /// <param name="colBottom">bottom input Blob vector (Length 2)</param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (m_biasLayer != null && m_rgbParamPropagateDown[m_rgbParamPropagateDown.Count - 1]) { m_biasLayer.Backward(colTop, m_rgbBiasPropagateDown, m_colBiasBottomVec); } bool bScaleParam = (colBottom.Count == 1) ? true : false; Blob <T> blobScale = (bScaleParam) ? m_colBlobs[0] : colBottom[1]; if ((!bScaleParam && rgbPropagateDown[1]) || (bScaleParam && m_rgbParamPropagateDown[0])) { long hTopDiff = colTop[0].gpu_diff; bool bInPlace = (colBottom[0] == colTop[0]) ? true : false; long hBottomData = (bInPlace) ? m_blobTemp.gpu_data : colBottom[0].gpu_data; // Hack: store big eltwise product in bottom[0].diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scale diff, and we're done. // If we're computing in-place (and not doing eltwise computation), this // hack doesn't work and we store the product in temp_. bool bIsEltwise = (colBottom[0].count() == blobScale.count()) ? true : false; long hProduct = (bIsEltwise) ? blobScale.mutable_gpu_diff : ((bInPlace) ? m_blobTemp.mutable_gpu_data : colBottom[0].mutable_gpu_diff); long hSumMult = m_blobSumMultiplier.gpu_data; m_cuda.mul(colTop[0].count(), hTopDiff, hBottomData, hProduct); if (!bIsEltwise) { long hSumResult = 0; if (m_nInnerDim == 1) { hSumResult = hProduct; } else if (m_blobSumResult.count() == 1) { double dfScaleDiff = convertD(blobScale.GetDiff(0)); if (bScaleParam) { T fDot = m_cuda.dot(m_nInnerDim, hProduct, hSumMult); dfScaleDiff += convertD(fDot); blobScale.SetDiff(dfScaleDiff, 0); } else { T fDot = m_cuda.dot(m_nInnerDim, hProduct, hSumMult); blobScale.SetDiff(convertD(fDot), 0); } } else { hSumResult = (m_nOuterDim == 1) ? blobScale.mutable_gpu_diff : m_blobSumResult.mutable_gpu_data; m_cuda.gemv(false, m_blobSumResult.count(), m_nInnerDim, m_tOne, hProduct, hSumMult, m_tZero, hSumResult); } if (m_nOuterDim != 1) { if (m_nScaleDim == 1) { double dfScaleDiff = convertD(blobScale.GetDiff(0)); if (bScaleParam) { T fDot = m_cuda.dot(m_nOuterDim, hSumMult, hSumResult); dfScaleDiff += convertD(fDot); blobScale.SetDiff(dfScaleDiff, 0); } else { T fDot = m_cuda.dot(m_nOuterDim, hSumMult, hSumResult); blobScale.SetDiff(convertD(fDot), 0); } } else { long hScaleDiff = blobScale.mutable_gpu_diff; m_cuda.gemv(true, m_nOuterDim, m_nScaleDim, m_tOne, hSumResult, hSumMult, (bScaleParam) ? m_tOne : m_tZero, hScaleDiff); } } } } if (rgbPropagateDown[0]) { int nCount = colTop[0].count(); long hTopDiff = colTop[0].gpu_diff; long hScaleData = blobScale.gpu_data; long hBottomDiff = colBottom[0].mutable_gpu_diff; m_cuda.scale_fwd(nCount, hTopDiff, hScaleData, m_nScaleDim, m_nInnerDim, hBottomDiff); } }
/// @brief Not implemented -- AccuracyDecodeLayer cannot be used as a loss. protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (rgbPropagateDown[0]) { throw new NotImplementedException(); } }
/// <summary> /// Computes the error gradient w.r.t. the MATH function value inputs. /// </summary> /// <param name="colTop">top output blob vector (length 1), providing the error gradient /// with respect to outputs /// -# @f$ (N \times C \times H \times W) @f$ /// </param> /// <param name="rgbPropagateDown">propagate_down see Layer::Backward.</param> /// <param name="colBottom">bottom input blob vector (length 1) /// -# @f$ (N \times C \times H \times W) @f$ /// </param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (!rgbPropagateDown[0]) { return; } int nCount = colBottom[0].count(); long hTopData = colTop[0].gpu_data; long hTopDiff = colTop[0].gpu_diff; long hBottomDiff = colBottom[0].mutable_gpu_diff; long hBottomData = colBottom[0].gpu_data; m_cuda.math_bwd(nCount, hTopDiff, hTopData, hBottomDiff, hBottomData, m_param.math_param.function); }
/// <summary> /// Computes the error gradient w.r.t the inputs. /// </summary> /// <param name="colTop">top output Blob vector (Length 1+), providing the error gradient /// with respect to computed outputs.</param> /// <param name="rgbPropagateDown">propagate down see Layer::Backward</param> /// <param name="colBottom">bottom input Blob vector (Length 1) /// </param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { if (!rgbPropagateDown[0]) { return; } if (colTop.Count == 1) { m_cuda.copy(m_nCount, colTop[0].gpu_diff, colBottom[0].mutable_gpu_diff); return; } m_cuda.add(m_nCount, colTop[0].gpu_diff, colTop[1].gpu_diff, colBottom[0].mutable_gpu_diff); // Add remaining top blob diffs. for (int i = 2; i < colTop.Count; i++) { long hTopDiff = colTop[i].gpu_diff; long hBottomDiff = colBottom[0].mutable_gpu_diff; m_cuda.axpy(m_nCount, m_tOne, hTopDiff, hBottomDiff); } }
/// <summary> /// Reshape the bottom (input) and top (output) blobs. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void Reshape(BlobCollection <T> colBottom, BlobCollection <T> colTop) { base.Reshape(colBottom, colTop); if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes)) { return; } m_log.CHECK_EQ(2, m_nNumSpatialAxes, "cuDNN Convolution input must have 2 spatial axes (e.g., height and width). Use 'engine: CAFFE' for general ND convolution."); m_nBottomOffset = m_nBottomDim / m_nGroup; m_nTopOffset = m_nTopDim / m_nGroup; int nHeight = colBottom[0].shape(m_nChannelAxis + 1); int nWidth = colBottom[0].shape(m_nChannelAxis + 2); int nHeightOut = colTop[0].shape(m_nChannelAxis + 1); int nWidthOut = colTop[0].shape(m_nChannelAxis + 2); Size szPad = size_at(m_blobPad); Size szStride = size_at(m_blobStride); ulong lWorkspaceLimitBytes = getWorkspaceLimitInBytes(); for (int i = 0; i < colBottom.Count; i++) { m_cuda.SetTensorDesc(m_rghBottomDesc[i], m_nNum, m_nChannels / m_nGroup, nHeight, nWidth, m_nChannels * nHeight * nWidth, nHeight * nWidth, nWidth, 1, m_bUseHalfSize); m_cuda.SetTensorDesc(m_rghTopDesc[i], m_nNum, m_nNumOutput / m_nGroup, nHeightOut, nWidthOut, m_nNumOutput * m_nOutSpatialDim, m_nOutSpatialDim, nWidthOut, 1, m_bUseHalfSize); m_cuda.SetConvolutionDesc(m_rghConvDesc[i], szPad.Height, szPad.Width, szStride.Height, szStride.Width, m_bUseHalfSize); // Get the algorithms and workspace sizes needed. CONV_FWD_ALGO algoFwd = (CONV_FWD_ALGO)0; CONV_BWD_FILTER_ALGO algoBwdFilter = (CONV_BWD_FILTER_ALGO)0; CONV_BWD_DATA_ALGO algoBwdData = (CONV_BWD_DATA_ALGO)0; ulong lWsSizeFwd = 0; ulong lWsSizeBwdFilter = 0; ulong lWsSizeBwdData = 0; m_cuda.GetConvolutionInfo(m_rghCudnn[0], m_rghBottomDesc[i], m_hFilterDesc, m_rghConvDesc[i], m_rghTopDesc[i], lWorkspaceLimitBytes, out algoFwd, out lWsSizeFwd, out algoBwdFilter, out lWsSizeBwdFilter, out algoBwdData, out lWsSizeBwdData); m_rgfwdAlgo[i] = algoFwd; m_rglWorkspaceFwdSizes[i] = lWsSizeFwd; m_rgbwdFilterAlgo[i] = algoBwdFilter; m_rglWorkspaceBwdFilterSizes[i] = lWsSizeBwdFilter; m_rgbwdDataAlgo[i] = algoBwdData; m_rglWorkspaceBwdDataSizes[i] = lWsSizeBwdData; } // reduce over all workspace sizes to get a maximum to allocate / reallocate ulong lTotalWsFwd = 0; ulong lTotalWsBwdFilter = 0; ulong lTotalWsBwdData = 0; for (int i = 0; i < colBottom.Count; i++) { lTotalWsFwd = Math.Max(lTotalWsFwd, m_rglWorkspaceFwdSizes[i]); lTotalWsBwdFilter = Math.Max(lTotalWsBwdFilter, m_rglWorkspaceBwdFilterSizes[i]); lTotalWsBwdData = Math.Max(lTotalWsBwdData, m_rglWorkspaceBwdDataSizes[i]); } // Get max over all oeprations. ulong lMaxWorkspace = Math.Max(lTotalWsFwd, Math.Max(lTotalWsBwdFilter, lTotalWsBwdData)); // Ensure all groups have enough workspace. ulong lTotalMaxWorkspace = (ulong)lMaxWorkspace * (ulong)m_nGroup * (ulong)CUDNN_STREAMS_PER_GROUP; // Initialize the workspace data. WorkspaceArgs wsArgs = getWorkspace(); // This is the total amount of storage needed over all groups + streams. if (lTotalMaxWorkspace > wsArgs.Size) { setWorkspace(lTotalMaxWorkspace); } // if we succedd in the allocation, set the offsets for the workspaces. for (int g = 0; g < (m_nGroup * CUDNN_STREAMS_PER_GROUP); g++) { m_rglWorkspaceFwdOffsets[g] = (ulong)g * lTotalWsFwd; m_rglWorkspaceBwdFilterOffsets[g] = (ulong)g * lTotalWsBwdFilter; m_rglWorkspaceBwdDataOffsets[g] = (ulong)g * lTotalWsBwdData; } // Tensor descriptor for bias. if (m_bBiasTerm) { m_cuda.SetTensorDesc(m_hBiasDesc, 1, m_nNumOutput / m_nGroup, 1, 1, m_bUseHalfSize); } }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_bConvertBottom = false; }
/// <summary> /// Run the Backward computation using the Engine.CAFFE mode as specified in the LayerParameter. /// </summary> /// <param name="colTop">top output Blob vector (length 1).</param> /// <param name="rgbPropagateDown">see Layer::Backward</param> /// <param name="colBottom">bottom input Blob vector (length 1).</param> protected void backward_cuda(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { long hWeight = m_colBlobs[0].gpu_data; long hWeightDiff = m_colBlobs[0].mutable_gpu_diff; for (int i = 0; i < colTop.Count; i++) { long hTopDiff = colTop[i].gpu_diff; // Bias gradient, if necessary. if (m_bBiasTerm && m_rgbParamPropagateDown[1]) { long hBiasDiff = m_colBlobs[1].mutable_gpu_diff; for (int n = 0; n < m_nNum; n++) { backward_bias(hBiasDiff, hTopDiff, n * m_nTopDim); } } if (m_rgbParamPropagateDown[0] || rgbPropagateDown[i]) { long hBottomData = colBottom[i].gpu_data; long hBottomDiff = colBottom[i].mutable_gpu_diff; for (int n = 0; n < m_nNum; n++) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (m_rgbParamPropagateDown[0]) { weight_gemm(hBottomData, n * m_nBottomDim, hTopDiff, n * m_nTopDim, hWeightDiff); } // gradient w.r.t. bottom data, if necessary. if (rgbPropagateDown[i]) { backward_gemm(hTopDiff, n * m_nTopDim, hWeight, hBottomDiff, n * m_nBottomDim); } } } } }
/// <summary> /// Computes the error gradient w.r.t. the LSTMUnit inputs. /// </summary> /// <param name="colTop">output Blob vector (length 2), providing the error gradient /// w.r.t. the outputs. /// -# @f$ (1 \times N \times D) @f$ /// containing error gradients @f$ \frac{\partial E}{\partial c_t} @f$ /// w.r.t. the updated cell state @f$ c_t @f$. /// -# @f$ (1 \times N \times D) @f$ /// containing error gradients @f$ \frac{\partial E}{\partial h_t} @f$ /// w.r.t. the updated cell state @f$ h_t @f$.</param> /// <param name="rgbPropagateDown">See Layer::Backward.</param> /// <param name="colBottom">input Blob vector (length 3), into which the error gradients /// w.r.t. the LSTMUnit inputs @f$ c_{t-1} @f$, and the gate inputs are computed. Computation /// of the error gradients w.r.t. the sequence indicators is not implemented. /// -# @f$ (1 \times N \times D) @f$ /// the error gradient w.r.t. the previous timestep cells tate @f$ c_{t-1} @f$ /// -# @f$ (1 \times N \times 4D) @f$ /// the error gradient w.r.t. the 'gate inputs' @f$ /// [ /// \frac{\partial E}{\partial 'i_t'} /// \frac{\partial E}{\partial 'f_t'} /// \frac{\partial E}{\partial 'o_t'} /// \frac{\partial E}{\partial 'g_t'} /// ] /// @f$ /// -# @f$(1 \times 1 \times N) @f$ /// the gradient w.r.t. the sequence continuation indicators @f$ \delta_t @f$ /// is currently not implemented. /// </param> protected override void backward(BlobCollection <T> colTop, List <bool> rgbPropagateDown, BlobCollection <T> colBottom) { m_log.CHECK(!rgbPropagateDown[2], "Cannot backpropagate to sequence indicators."); if (!rgbPropagateDown[0] && !rgbPropagateDown[1]) { return; } int nCount = colTop[1].count(); long hC_prev = colBottom[0].gpu_data; long hX_acts = m_blobXActs.gpu_data; long hCont = colBottom[2].gpu_data; long hC = colTop[0].gpu_data; long hH = colTop[1].gpu_data; long hC_diff = colTop[0].gpu_diff; long hH_diff = colTop[1].gpu_diff; long hC_prev_diff = colBottom[0].mutable_gpu_diff; long hX_acts_diff = m_blobXActs.mutable_gpu_diff; int nXCount = colBottom[1].count(); long hX_diff = colBottom[1].mutable_gpu_diff; m_cuda.lstm_unit_bwd(nCount, m_nHiddenDim, nXCount, hC_prev, hX_acts, hC, hH, hCont, hC_diff, hH_diff, hC_prev_diff, hX_acts_diff, hX_diff); }