Beispiel #1
0
        /// <summary>
        /// Col2s the im.
        /// </summary>
        /// <param name="col">The col.</param>
        /// <param name="im">The im.</param>
        /// <param name="channels">The channels.</param>
        /// <param name="height">The height.</param>
        /// <param name="width">The width.</param>
        /// <param name="patch_h">The patch h.</param>
        /// <param name="patch_w">The patch w.</param>
        /// <param name="pad_h">The pad h.</param>
        /// <param name="pad_w">The pad w.</param>
        /// <param name="stride_h">The stride h.</param>
        /// <param name="stride_w">The stride w.</param>
        /// <param name="dilation_h">The dilation h.</param>
        /// <param name="dilation_w">The dilation w.</param>
        public void Col2Im(NDArray col, NDArray im, int channels, int height, int width,
                           int patch_h, int patch_w, int pad_h,
                           int pad_w, int stride_h, int stride_w,
                           int dilation_h, int dilation_w)
        {
            var context     = CudaHelpers.TSContextForTensor(im);
            var cudaContext = context.CudaContextForTensor(im);


            int height_col = (height + 2 * pad_h - (dilation_h * (patch_h - 1) + 1))
                             / stride_h + 1;
            int width_col = (width + 2 * pad_w - (dilation_w * (patch_w - 1) + 1))
                            / stride_w + 1;
            int num_kernels = channels * height * width;

            var data_im  = CudaHelpers.GetBufferStart(im);
            var data_col = CudaHelpers.GetBufferStart(col);

            // From Torch source:
            // To avoid involving atomic operations, we will launch one kernel per
            // bottom dimension, and then in the kernel add up the top dimensions.

            Invoke(context, cudaContext, "col2im_kernel", new dim3(NNThreads.NumBlocks(num_kernels)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                   num_kernels, data_col, height, width, channels, patch_h, patch_w, pad_h, pad_w, stride_h, stride_w,
                   dilation_h, dilation_w,
                   height_col, width_col, data_im);
        }
Beispiel #2
0
        /// <summary>
        /// Spatials the maximum pooling backward.
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="gradOutput">The grad output.</param>
        /// <param name="gradInput">The grad input.</param>
        /// <param name="indices">The indices.</param>
        /// <param name="cd">The cd.</param>
        /// <param name="ceilMode">if set to <c>true</c> [ceil mode].</param>
        public void SpatialMaxPoolingBackward(NDArray input, NDArray gradOutput, NDArray gradInput, NDArray indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            var context     = CudaHelpers.TSContextForTensor(gradOutput);
            var cudaContext = context.CudaContextForTensor(gradOutput);

            var dimw = 3;
            var dimh = 2;
            var dimc = 1;

            var nbatch  = input.Shape[0];
            var nslices = input.Shape[dimc];
            var iheight = input.Shape[dimh];
            var iwidth  = input.Shape[dimw];
            var owidth  = gradOutput.Shape[dimw];
            var oheight = gradOutput.Shape[dimh];


            using (var gradOutputContig = Ops.AsContiguous(gradOutput))
            {
                var gradOutputPtr = CudaHelpers.GetBufferStart(gradOutputContig);
                var indicesPtr    = CudaHelpers.GetBufferStart(indices);
                var gradInputPtr  = CudaHelpers.GetBufferStart(gradInput);

                var count = (int)input.ElementCount();

                Invoke(context, cudaContext, "MaxPoolBackward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                       count, gradOutputPtr, indicesPtr, nbatch, nslices, iheight, iwidth, oheight, owidth,
                       cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, gradInputPtr);
            }
        }
Beispiel #3
0
        /// <summary>
        /// Im2s the col.
        /// </summary>
        /// <param name="im">The im.</param>
        /// <param name="col">The col.</param>
        /// <param name="channels">The channels.</param>
        /// <param name="height">The height.</param>
        /// <param name="width">The width.</param>
        /// <param name="ksize_h">The ksize h.</param>
        /// <param name="ksize_w">The ksize w.</param>
        /// <param name="pad_h">The pad h.</param>
        /// <param name="pad_w">The pad w.</param>
        /// <param name="stride_h">The stride h.</param>
        /// <param name="stride_w">The stride w.</param>
        /// <param name="dilation_h">The dilation h.</param>
        /// <param name="dilation_w">The dilation w.</param>
        public void Im2Col(NDArray im, NDArray col, int channels,
                           int height, int width,
                           int ksize_h, int ksize_w, int pad_h,
                           int pad_w, int stride_h, int stride_w,
                           int dilation_h, int dilation_w)
        {
            var context     = CudaHelpers.TSContextForTensor(im);
            var cudaContext = context.CudaContextForTensor(im);

            // From Torch source:
            // We are going to launch channels * height_col * width_col kernels, each
            // kernel responsible for copying a single-channel grid.
            int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1))
                             / stride_h + 1;
            int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1))
                            / stride_w + 1;
            int num_kernels = channels * height_col * width_col;

            var data_im  = CudaHelpers.GetBufferStart(im);
            var data_col = CudaHelpers.GetBufferStart(col);

            Invoke(context, cudaContext, "im2col_kernel", new dim3(NNThreads.NumBlocks(num_kernels)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                   num_kernels, data_im, height, width, channels, ksize_h, ksize_w,
                   pad_h, pad_w, stride_h, stride_w,
                   dilation_h, dilation_w,
                   height_col, width_col, data_col);
        }
Beispiel #4
0
        /// <summary>
        /// Spatials the maximum pooling forward.
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="output">The output.</param>
        /// <param name="indices">The indices.</param>
        /// <param name="cd">The cd.</param>
        /// <param name="ceilMode">if set to <c>true</c> [ceil mode].</param>
        public void SpatialMaxPoolingForward(NDArray input, NDArray output, NDArray indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            var context     = CudaHelpers.TSContextForTensor(input);
            var cudaContext = context.CudaContextForTensor(input);

            var iwidth      = input.Shape[3];
            var iheight     = input.Shape[2];
            var nInputPlane = input.Shape[1];
            var batchSize   = input.Shape[0];

            long owidth;
            long oheight;

            if (ceilMode)
            {
                oheight = (long)(Math.Ceiling((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                owidth  = (long)(Math.Ceiling((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }
            else
            {
                oheight = (long)(Math.Floor((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                owidth  = (long)(Math.Floor((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }

            if (cd.padW != 0 || cd.padH != 0)
            {
                // ensure that the last pooling starts inside the image
                if ((oheight - 1) * cd.dH >= iheight + cd.padH)
                {
                    --oheight;
                }
                if ((owidth - 1) * cd.dW >= iwidth + cd.padW)
                {
                    --owidth;
                }
            }

            using (var inputContig = Ops.AsContiguous(input))
            {
                var inputPtr   = CudaHelpers.GetBufferStart(inputContig);
                var outputPtr  = CudaHelpers.GetBufferStart(output);
                var indicesPtr = CudaHelpers.GetBufferStart(indices);

                var count = (int)output.ElementCount();

                Invoke(context, cudaContext, "MaxPoolForward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                       count, inputPtr, batchSize, nInputPlane, iheight, iwidth, oheight, owidth,
                       cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, outputPtr, indicesPtr);
            }
        }