Пример #1
0
        public override void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient)
        {
            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            gradient.CopyToDevice();
            input.CopyToDevice();
            kernelsGradient.CopyToDevice();

            using (var convolutionDesc = new ConvolutionDescriptor())
                using (var gradientDesc = new TensorDescriptor())
                    using (var inputDesc = new TensorDescriptor())
                        using (var kernelsGradientsDesc = new FilterDescriptor())
                        {
                            convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);
                            gradientDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, gradient.Shape.Dimensions[3], gradient.Shape.Dimensions[2], gradient.Shape.Dimensions[1], gradient.Shape.Dimensions[0]);
                            inputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, input.Shape.Dimensions[3], input.Shape.Dimensions[2], input.Shape.Dimensions[1], input.Shape.Dimensions[0]);
                            kernelsGradientsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernelsGradient.Shape.Dimensions[3], kernelsGradient.Shape.Dimensions[2], kernelsGradient.Shape.Dimensions[1], kernelsGradient.Shape.Dimensions[0]);

                            var algo          = _CudnnContext.GetConvolutionBackwardFilterAlgorithm(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero);
                            var workspaceSize = _CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, algo);
                            workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize;

                            if (kernelsGradient.GpuData.ConvBackKernelWorkspace == null || kernelsGradient.GpuData.ConvBackKernelWorkspace.Size != workspaceSize)
                            {
                                kernelsGradient.GpuData.ConvBackKernelWorkspace = new CudaDeviceVariable <byte>(workspaceSize);
                            }

                            _CudnnContext.ConvolutionBackwardFilter(1.0f, inputDesc, input.GpuData.DeviceVar, gradientDesc, gradient.GpuData.DeviceVar, convolutionDesc, algo, kernelsGradient.GpuData.ConvBackKernelWorkspace, 0.0f, kernelsGradientsDesc, kernelsGradient.GpuData.DeviceVar);
                        }
        }
Пример #2
0
        public override void Conv2D(Tensor t, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor result)
        {
            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, t.Width, t.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            t.CopyToDevice();
            kernels.CopyToDevice();
            result.CopyToDevice();

            using (var convolutionDesc = new ConvolutionDescriptor())
                using (var tDesc = new TensorDescriptor())
                    using (var kernelsDesc = new FilterDescriptor())
                        using (var resultDesc = new TensorDescriptor())
                        {
                            convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);
                            tDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, t.Shape.Dimensions[3], t.Shape.Dimensions[2], t.Shape.Dimensions[1], t.Shape.Dimensions[0]);
                            kernelsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernels.Shape.Dimensions[3], kernels.Shape.Dimensions[2], kernels.Shape.Dimensions[1], kernels.Shape.Dimensions[0]);
                            resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, result.Shape.Dimensions[3], result.Shape.Dimensions[2], result.Shape.Dimensions[1], result.Shape.Dimensions[0]);

                            var algo = _CudnnContext.GetConvolutionForwardAlgorithm(tDesc, kernelsDesc, convolutionDesc, resultDesc, cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero);

                            var workspaceSize = _CudnnContext.GetConvolutionForwardWorkspaceSize(tDesc, kernelsDesc, convolutionDesc, resultDesc, algo);
                            workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize;

                            if (result.GpuData.ConvWorkspace == null || result.GpuData.ConvWorkspace.Size != workspaceSize)
                            {
                                result.GpuData.ConvWorkspace = new CudaDeviceVariable <byte>(workspaceSize);
                            }

                            _CudnnContext.ConvolutionForward(1.0f, tDesc, t.GpuData.DeviceVar, kernelsDesc, kernels.GpuData.DeviceVar, convolutionDesc, algo, result.GpuData.ConvWorkspace, 0.0f, resultDesc, result.GpuData.DeviceVar);
                        }
        }
Пример #3
0
        public override void DoConvolutionGradient(Volume <float> filters, Volume <float> outputGradients,
                                                   Volume <float> inputGradient, Volume <float> filterGradient, int pad,
                                                   int stride)
        {
            var inputStorage          = this._volumeStorage;
            var outputGradientStorage = outputGradients.Storage as VolumeStorage;
            var filterStorage         = filters.Storage as VolumeStorage;
            var inputGradientStorage  = inputGradient.Storage as VolumeStorage;
            var filterGradientStorage = filterGradient.Storage as VolumeStorage;

            // Copy to device if not already done
            inputStorage.CopyToDevice();
            outputGradientStorage.CopyToDevice();
            filterStorage.CopyToDevice();
            inputGradientStorage.CopyToDevice();
            filterGradientStorage.CopyToDevice();

            using (var dataDesc = new TensorDescriptor())
                using (var filterDesc = new FilterDescriptor())
                    using (var dDataDesc = new TensorDescriptor())
                        using (var dOutputDesc = new TensorDescriptor())
                            using (var dfilterDesc = new FilterDescriptor())
                                using (var convolutionDesc = new ConvolutionDescriptor())
                                {
                                    convolutionDesc.SetConvolution2dDescriptor(pad, pad, stride, stride, 1, 1,
                                                                               cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);

                                    dataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                                   this.Shape.GetDimension(3),
                                                                   this.Shape.GetDimension(2),
                                                                   this.Shape.GetDimension(1),
                                                                   this.Shape.GetDimension(0));

                                    dDataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                                    this.Shape.GetDimension(3),
                                                                    this.Shape.GetDimension(2),
                                                                    this.Shape.GetDimension(1),
                                                                    this.Shape.GetDimension(0));

                                    dOutputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                                      outputGradients.Shape.GetDimension(3),
                                                                      outputGradients.Shape.GetDimension(2),
                                                                      outputGradients.Shape.GetDimension(1),
                                                                      outputGradients.Shape.GetDimension(0));

                                    filterDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW,
                                                                     filters.Shape.GetDimension(3),
                                                                     filters.Shape.GetDimension(2),
                                                                     filters.Shape.GetDimension(1),
                                                                     filters.Shape.GetDimension(0));

                                    dfilterDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW,
                                                                      filters.Shape.GetDimension(3),
                                                                      filters.Shape.GetDimension(2),
                                                                      filters.Shape.GetDimension(1),
                                                                      filters.Shape.GetDimension(0));

                                    var filterAlgo = this._context.CudnnContext.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc,
                                                                                                                      convolutionDesc, dfilterDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero);
                                    var filterWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(dataDesc,
                                                                                                                                   dOutputDesc, convolutionDesc, dfilterDesc, filterAlgo);
                                    filterWorkspaceSize = filterWorkspaceSize == 0 ? new SizeT(1) : filterWorkspaceSize;

                                    var dataAlgo = this._context.CudnnContext.GetConvolutionBackwardDataAlgorithm(filterDesc, dOutputDesc,
                                                                                                                  convolutionDesc, dDataDesc, cudnnConvolutionBwdDataPreference.PreferFastest, IntPtr.Zero);
                                    var dataWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardDataWorkspaceSize(dfilterDesc,
                                                                                                                               dOutputDesc, convolutionDesc, dDataDesc, dataAlgo);
                                    dataWorkspaceSize = dataWorkspaceSize == 0 ? new SizeT(1) : dataWorkspaceSize;

                                    // filter
                                    if (this._volumeStorage.ConvolutionBackwardFilterStorage == null || this._volumeStorage.ConvolutionBackwardFilterStorage.Size != filterWorkspaceSize)
                                    {
                                        this._volumeStorage.ConvolutionBackwardFilterStorage = new CudaDeviceVariable <byte>(filterWorkspaceSize);
                                    }
                                    this._context.CudnnContext.ConvolutionBackwardFilter(1.0f, dataDesc, inputStorage.DeviceBuffer, dOutputDesc,
                                                                                         outputGradientStorage.DeviceBuffer, convolutionDesc, filterAlgo,
                                                                                         this._volumeStorage.ConvolutionBackwardFilterStorage, 0.0f, dfilterDesc,
                                                                                         filterGradientStorage.DeviceBuffer);

                                    // data
                                    if (this._volumeStorage.ConvolutionBackwardStorage == null || this._volumeStorage.ConvolutionBackwardStorage.Size != dataWorkspaceSize)
                                    {
                                        this._volumeStorage.ConvolutionBackwardStorage = new CudaDeviceVariable <byte>(dataWorkspaceSize);
                                    }

                                    this._context.CudnnContext.ConvolutionBackwardData(1.0f,
                                                                                       filterDesc, filterStorage.DeviceBuffer,
                                                                                       dOutputDesc, outputGradientStorage.DeviceBuffer,
                                                                                       convolutionDesc, dataAlgo,
                                                                                       this._volumeStorage.ConvolutionBackwardStorage, 0.0f,
                                                                                       dDataDesc, inputGradientStorage.DeviceBuffer);
                                }
        }
Пример #4
0
        public override void DoConvolution(Volume <float> filters, int pad, int stride, Volume <float> result)
        {
            var resultStorage = result.Storage as VolumeStorage;

            if (resultStorage == null)
            {
                throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result));
            }

            var inputStorage  = this._volumeStorage;
            var filterStorage = filters.Storage as VolumeStorage;

            // Copy to device if not already done
            inputStorage.CopyToDevice();
            filterStorage.CopyToDevice();
            resultStorage.CopyToDevice();

            // Synchro
            this._context.DefaultStream.Synchronize();

            using (var dataDesc = new TensorDescriptor())
                using (var filterDesc = new FilterDescriptor())
                    using (var outputDesc = new TensorDescriptor())
                        using (var convolutionDesc = new ConvolutionDescriptor())
                        {
                            convolutionDesc.SetConvolution2dDescriptor(pad, pad, stride, stride, 1, 1,
                                                                       cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);

                            dataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                           this.Shape.GetDimension(3),
                                                           this.Shape.GetDimension(2),
                                                           this.Shape.GetDimension(1),
                                                           this.Shape.GetDimension(0));

                            filterDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW,
                                                             filters.Shape.GetDimension(3),
                                                             filters.Shape.GetDimension(2),
                                                             filters.Shape.GetDimension(1),
                                                             filters.Shape.GetDimension(0));

                            outputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                             result.Shape.GetDimension(3),
                                                             result.Shape.GetDimension(2),
                                                             result.Shape.GetDimension(1),
                                                             result.Shape.GetDimension(0));

                            var algo = this._context.CudnnContext.GetConvolutionForwardAlgorithm(
                                dataDesc, filterDesc,
                                convolutionDesc, outputDesc,
                                cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero);

                            var workspaceSize = this._context.CudnnContext.GetConvolutionForwardWorkspaceSize(
                                dataDesc, filterDesc,
                                convolutionDesc, outputDesc, algo);
                            workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize;

                            if (this._volumeStorage.ConvolutionStorage == null || this._volumeStorage.ConvolutionStorage.Size != workspaceSize)
                            {
                                this._volumeStorage.ConvolutionStorage = new CudaDeviceVariable <byte>(workspaceSize);
                            }

                            this._context.CudnnContext.ConvolutionForward(1.0f,
                                                                          dataDesc, inputStorage.DeviceBuffer,
                                                                          filterDesc, filterStorage.DeviceBuffer,
                                                                          convolutionDesc, algo, this._volumeStorage.ConvolutionStorage, 0.0f,
                                                                          outputDesc, resultStorage.DeviceBuffer);
                        }
        }
        public ConvolutionalLayer(int widthIn, int heightIn, int channelsIn, int widthOut, int heightOut, int channelsOut, int batch, int filterWidth, int filterHeight, Activation activation, CudaBlas blasCtx, CudaDNNContext cudnnCtx, CudaContext ctx, CUmodule moduleBorder, CUmodule modulePrelu)
            : base(widthIn, heightIn, channelsIn, widthOut, heightOut, channelsOut, batch)
        {
            _activation      = activation;
            _filterX         = filterWidth;
            _filterY         = filterHeight;
            _weights         = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _d_weights       = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _bias            = new CudaDeviceVariable <float>(channelsOut);
            _d_bias          = new CudaDeviceVariable <float>(channelsOut);
            _dx              = new CudaDeviceVariable <float>(widthIn * heightIn * channelsIn * batch);
            _y               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _dy              = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _z               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _ones            = new CudaDeviceVariable <float>(batch);
            _withBorderInput = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _withBorderDx    = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _cudnn           = cudnnCtx;
            _blas            = blasCtx;
            _descActivation  = new ActivationDescriptor();
            _descActivation.SetActivationDescriptor(cudnnActivationMode.Relu, cudnnNanPropagation.NotPropagateNan, 0);
            _descBias = new TensorDescriptor();
            _descBias.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, 1, channelsOut, 1, 1);
            _descDataInBorder = new TensorDescriptor();
            _descDataIn       = new TensorDescriptor();
            _descDataIn.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsIn, heightIn + filterHeight - 1, widthIn + filterWidth - 1);
            _descDataOut = new TensorDescriptor();
            _descDataOut.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsOut, heightOut, widthOut);
            _descFilter = new FilterDescriptor();
            _descFilter.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, channelsOut, channelsIn, filterWidth, filterHeight);
            _descConv       = new ConvolutionDescriptor();
            _descConvBorder = new ConvolutionDescriptor();
            _descConv.SetConvolution2dDescriptor(0, 0, 1, 1, 1, 1, cudnnConvolutionMode.Convolution, cudnnDataType.Float);

            int n = 0;
            int c = 0;
            int h = 0;
            int w = 0;

            _descConv.GetConvolution2dForwardOutputDim(_descDataIn, _descFilter, ref n, ref c, ref h, ref w);

            _kernelAddBorder = new AddBorderKernel(moduleBorder, ctx);
            _kernelAddBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn + filterWidth - 1, (heightIn + filterHeight - 1) / 2 + 1, 1);
            _kernelCropBorder = new CropBorderKernel(moduleBorder, ctx);
            _kernelCropBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn, heightIn / 2 + 1, 1);

            if (_activation == Activation.PRelu || _activation == Activation.LeakyRelu)
            {
                _temp                 = new CudaDeviceVariable <float>(channelsOut * batch);
                _aRelu                = new CudaDeviceVariable <float>(channelsOut);
                _dARelu               = new CudaDeviceVariable <float>(channelsOut);
                _KernelPReluForward   = new PReluForwardKernel(modulePrelu, ctx);
                _KernelPReluBackward  = new PReluBackwardKernel(modulePrelu, ctx);
                _KernelPReluBackward1 = new PReluBackward1Kernel(modulePrelu, ctx);
                _KernelPReluBackward2 = new PReluBackward2Kernel(modulePrelu, ctx);
                _KernelPReluForward.SetComputeSize((uint)widthOut * (uint)heightOut, (uint)channelsOut, (uint)batch);
                _KernelPReluBackward.SetComputeSize((uint)channelsOut, 1, 1);
            }

            cudnnConvolutionFwdAlgoPerf[] algos =
                _cudnn.FindConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv, _descDataOut, 5);

            cudnnConvolutionBwdDataAlgoPerf[] algos2 = _cudnn.FindConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, 5);

            _algoFwd = _cudnn.GetConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv,
                                                             _descDataOut, cudnnConvolutionFwdPreference.PreferFastest, 0);


            SizeT sizeInBytes = 0, tmpsize = 0;

            sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_descDataIn, _descFilter,
                                                                    _descConv, _descDataOut, _algoFwd);

            _algoBwdFilter = _cudnn.GetConvolutionBackwardFilterAlgorithm(_descDataIn, _descDataOut, _descConv, _descFilter,
                                                                          cudnnConvolutionBwdFilterPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardFilterWorkspaceSize(_descDataIn, _descDataOut, _descConv, _descFilter, _algoBwdFilter);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            _algoBwdData = _cudnn.GetConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, cudnnConvolutionBwdDataPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardDataWorkspaceSize(_descFilter, _descDataOut, _descConv, _descDataIn, _algoBwdData);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            if (sizeInBytes > 0)
            {
                _workspace = new CudaDeviceVariable <byte>(sizeInBytes);
            }
            else
            {
                _workspace = CudaDeviceVariable <byte> .Null;
            }
        }
Пример #6
0
        public override void ConvolutionGradient(Volume <double> filters, Volume <double> outputGradients,
                                                 Volume <double> filterGradient, int xpad, int ypad, int stride, Volume <double> inputGradient)
        {
            var inputStorage          = this._volumeStorage;
            var outputGradientStorage = outputGradients.Storage as VolumeStorage;
            var filterStorage         = filters.Storage as VolumeStorage;
            var inputGradientStorage  = inputGradient.Storage as VolumeStorage;
            var filterGradientStorage = filterGradient.Storage as VolumeStorage;

            // Copy to device if not already done
            inputStorage.CopyToDevice();
            outputGradientStorage.CopyToDevice();
            filterStorage.CopyToDevice();
            inputGradientStorage.CopyToDevice();
            filterGradientStorage.CopyToDevice();

            using var dataDesc        = new TensorDescriptor();
            using var filterDesc      = new FilterDescriptor();
            using var dDataDesc       = new TensorDescriptor();
            using var dOutputDesc     = new TensorDescriptor();
            using var dfilterDesc     = new FilterDescriptor();
            using var convolutionDesc = new ConvolutionDescriptor();

            convolutionDesc.SetConvolution2dDescriptor(ypad, xpad, stride, stride, 1, 1,
                                                       cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Double);

            dataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double,
                                           this.Shape.Dimensions[3],
                                           this.Shape.Dimensions[2],
                                           this.Shape.Dimensions[1],
                                           this.Shape.Dimensions[0]);

            dDataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double,
                                            this.Shape.Dimensions[3],
                                            this.Shape.Dimensions[2],
                                            this.Shape.Dimensions[1],
                                            this.Shape.Dimensions[0]);

            dOutputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double,
                                              outputGradients.Shape.Dimensions[3],
                                              outputGradients.Shape.Dimensions[2],
                                              outputGradients.Shape.Dimensions[1],
                                              outputGradients.Shape.Dimensions[0]);

            filterDesc.SetFilter4dDescriptor(cudnnDataType.Double, cudnnTensorFormat.NCHW,
                                             filters.Shape.Dimensions[3],
                                             filters.Shape.Dimensions[2],
                                             filters.Shape.Dimensions[1],
                                             filters.Shape.Dimensions[0]);

            dfilterDesc.SetFilter4dDescriptor(cudnnDataType.Double, cudnnTensorFormat.NCHW,
                                              filters.Shape.Dimensions[3],
                                              filters.Shape.Dimensions[2],
                                              filters.Shape.Dimensions[1],
                                              filters.Shape.Dimensions[0]);

            var filterAlgo = this._context.CudnnContext.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc,
                                                                                              convolutionDesc, dfilterDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero);
            var filterWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(dataDesc,
                                                                                                           dOutputDesc, convolutionDesc, dfilterDesc, filterAlgo);

            filterWorkspaceSize = filterWorkspaceSize == 0 ? new SizeT(1) : filterWorkspaceSize;

            var dataAlgo = this._context.CudnnContext.GetConvolutionBackwardDataAlgorithm(filterDesc, dOutputDesc,
                                                                                          convolutionDesc, dDataDesc, cudnnConvolutionBwdDataPreference.PreferFastest, IntPtr.Zero);
            var dataWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardDataWorkspaceSize(dfilterDesc,
                                                                                                       dOutputDesc, convolutionDesc, dDataDesc, dataAlgo);

            dataWorkspaceSize = dataWorkspaceSize == 0 ? new SizeT(1) : dataWorkspaceSize;

            // filter
            if (inputGradientStorage.ConvolutionBackwardFilterStorage == null || inputGradientStorage.ConvolutionBackwardFilterStorage.Size != filterWorkspaceSize)
            {
                inputGradientStorage.ConvolutionBackwardFilterStorage = new CudaDeviceVariable <byte>(filterWorkspaceSize);
            }

            this._context.CudnnContext.ConvolutionBackwardFilter(1.0, dataDesc, inputStorage.DeviceBuffer, dOutputDesc,
                                                                 outputGradientStorage.DeviceBuffer, convolutionDesc, filterAlgo,
                                                                 inputGradientStorage.ConvolutionBackwardFilterStorage, 0.0, dfilterDesc,
                                                                 filterGradientStorage.DeviceBuffer);

            // data
            if (inputGradientStorage.ConvolutionBackwardStorage == null || inputGradientStorage.ConvolutionBackwardStorage.Size != dataWorkspaceSize)
            {
                inputGradientStorage.ConvolutionBackwardStorage = new CudaDeviceVariable <byte>(dataWorkspaceSize);
            }

            this._context.CudnnContext.ConvolutionBackwardData(1.0,
                                                               filterDesc, filterStorage.DeviceBuffer,
                                                               dOutputDesc, outputGradientStorage.DeviceBuffer,
                                                               convolutionDesc, dataAlgo,
                                                               inputGradientStorage.ConvolutionBackwardStorage, 0.0,
                                                               dDataDesc, inputGradientStorage.DeviceBuffer);
        }