/// <summary> /// Col2s the im. /// </summary> /// <param name="col">The col.</param> /// <param name="im">The im.</param> /// <param name="channels">The channels.</param> /// <param name="height">The height.</param> /// <param name="width">The width.</param> /// <param name="patch_h">The patch h.</param> /// <param name="patch_w">The patch w.</param> /// <param name="pad_h">The pad h.</param> /// <param name="pad_w">The pad w.</param> /// <param name="stride_h">The stride h.</param> /// <param name="stride_w">The stride w.</param> /// <param name="dilation_h">The dilation h.</param> /// <param name="dilation_w">The dilation w.</param> public void Col2Im(NDArray col, NDArray im, int channels, int height, int width, int patch_h, int patch_w, int pad_h, int pad_w, int stride_h, int stride_w, int dilation_h, int dilation_w) { var context = CudaHelpers.TSContextForTensor(im); var cudaContext = context.CudaContextForTensor(im); int height_col = (height + 2 * pad_h - (dilation_h * (patch_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (patch_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height * width; var data_im = CudaHelpers.GetBufferStart(im); var data_col = CudaHelpers.GetBufferStart(col); // From Torch source: // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. Invoke(context, cudaContext, "col2im_kernel", new dim3(NNThreads.NumBlocks(num_kernels)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream, num_kernels, data_col, height, width, channels, patch_h, patch_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col, width_col, data_im); }
/// <summary> /// Spatials the maximum pooling backward. /// </summary> /// <param name="input">The input.</param> /// <param name="gradOutput">The grad output.</param> /// <param name="gradInput">The grad input.</param> /// <param name="indices">The indices.</param> /// <param name="cd">The cd.</param> /// <param name="ceilMode">if set to <c>true</c> [ceil mode].</param> public void SpatialMaxPoolingBackward(NDArray input, NDArray gradOutput, NDArray gradInput, NDArray indices, ConvolutionDesc2d cd, bool ceilMode) { var context = CudaHelpers.TSContextForTensor(gradOutput); var cudaContext = context.CudaContextForTensor(gradOutput); var dimw = 3; var dimh = 2; var dimc = 1; var nbatch = input.Shape[0]; var nslices = input.Shape[dimc]; var iheight = input.Shape[dimh]; var iwidth = input.Shape[dimw]; var owidth = gradOutput.Shape[dimw]; var oheight = gradOutput.Shape[dimh]; using (var gradOutputContig = Ops.AsContiguous(gradOutput)) { var gradOutputPtr = CudaHelpers.GetBufferStart(gradOutputContig); var indicesPtr = CudaHelpers.GetBufferStart(indices); var gradInputPtr = CudaHelpers.GetBufferStart(gradInput); var count = (int)input.ElementCount(); Invoke(context, cudaContext, "MaxPoolBackward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream, count, gradOutputPtr, indicesPtr, nbatch, nslices, iheight, iwidth, oheight, owidth, cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, gradInputPtr); } }
/// <summary> /// Im2s the col. /// </summary> /// <param name="im">The im.</param> /// <param name="col">The col.</param> /// <param name="channels">The channels.</param> /// <param name="height">The height.</param> /// <param name="width">The width.</param> /// <param name="ksize_h">The ksize h.</param> /// <param name="ksize_w">The ksize w.</param> /// <param name="pad_h">The pad h.</param> /// <param name="pad_w">The pad w.</param> /// <param name="stride_h">The stride h.</param> /// <param name="stride_w">The stride w.</param> /// <param name="dilation_h">The dilation h.</param> /// <param name="dilation_w">The dilation w.</param> public void Im2Col(NDArray im, NDArray col, int channels, int height, int width, int ksize_h, int ksize_w, int pad_h, int pad_w, int stride_h, int stride_w, int dilation_h, int dilation_w) { var context = CudaHelpers.TSContextForTensor(im); var cudaContext = context.CudaContextForTensor(im); // From Torch source: // We are going to launch channels * height_col * width_col kernels, each // kernel responsible for copying a single-channel grid. int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height_col * width_col; var data_im = CudaHelpers.GetBufferStart(im); var data_col = CudaHelpers.GetBufferStart(col); Invoke(context, cudaContext, "im2col_kernel", new dim3(NNThreads.NumBlocks(num_kernels)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream, num_kernels, data_im, height, width, channels, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col, width_col, data_col); }
/// <summary> /// Spatials the maximum pooling forward. /// </summary> /// <param name="input">The input.</param> /// <param name="output">The output.</param> /// <param name="indices">The indices.</param> /// <param name="cd">The cd.</param> /// <param name="ceilMode">if set to <c>true</c> [ceil mode].</param> public void SpatialMaxPoolingForward(NDArray input, NDArray output, NDArray indices, ConvolutionDesc2d cd, bool ceilMode) { var context = CudaHelpers.TSContextForTensor(input); var cudaContext = context.CudaContextForTensor(input); var iwidth = input.Shape[3]; var iheight = input.Shape[2]; var nInputPlane = input.Shape[1]; var batchSize = input.Shape[0]; long owidth; long oheight; if (ceilMode) { oheight = (long)(Math.Ceiling((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1; owidth = (long)(Math.Ceiling((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1; } else { oheight = (long)(Math.Floor((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1; owidth = (long)(Math.Floor((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1; } if (cd.padW != 0 || cd.padH != 0) { // ensure that the last pooling starts inside the image if ((oheight - 1) * cd.dH >= iheight + cd.padH) { --oheight; } if ((owidth - 1) * cd.dW >= iwidth + cd.padW) { --owidth; } } using (var inputContig = Ops.AsContiguous(input)) { var inputPtr = CudaHelpers.GetBufferStart(inputContig); var outputPtr = CudaHelpers.GetBufferStart(output); var indicesPtr = CudaHelpers.GetBufferStart(indices); var count = (int)output.ElementCount(); Invoke(context, cudaContext, "MaxPoolForward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream, count, inputPtr, batchSize, nInputPlane, iheight, iwidth, oheight, owidth, cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, outputPtr, indicesPtr); } }