Esempio n. 1
0
        public Convolution2D(Variable <T> data, int kernelH, int kernelW, int numFilter)
        {
            Util.EnsureTrue(data.Shape.Rank == 4);
            Util.EnsureTrue(data.Shape[1] > 0);
            Util.EnsureTrue(data.Shape[2] > 0);
            Util.EnsureTrue(data.Shape[3] > 0);

            var numInputFilter  = data.Shape[1];
            var numOutputFilter = numFilter;
            var height          = data.Shape[2];
            var width           = data.Shape[3];

            // fixed padding and stride now
            ConvolutionDesc = new ConvolutionDescriptor();
            ConvolutionDesc.Set2D(0, 0, 1, 1, 1, 1, ConvolutionMode.CROSS_CORRELATION);

            using (var dataDesc = new TensorDescriptor())
                using (var weightDesc = new FilterDescriptor())
                {
                    var dataType = Dnn.DataTypeOf <T>();
                    var tempN    = 100; // for temp mini batch size
                    dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, tempN, (int)numInputFilter, (int)height, (int)width);
                    weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, numOutputFilter, (int)numInputFilter, kernelH, kernelW);

                    // get output dimension
                    int n, c, h, w;
                    ConvolutionDesc.Get2DForwardOutputDim(dataDesc, weightDesc, out n, out c, out h, out w);

                    //Console.WriteLine($"{c},{h},{w}");

                    // Create variables
                    var scale = Sqrt(3.0.AsScalar <T>() / ((double)(numInputFilter * kernelH * kernelW)).AsScalar <T>());

                    Data       = data;
                    Weight     = Parameter(scale * (2.0.AsScalar <T>() * RandomUniform <T>(Shape.Create(numOutputFilter, numInputFilter, kernelH, kernelW), 0UL, 0UL) - 1.0.AsScalar <T>()));
                    Bias       = Parameter(Fill(Shape.Create(c), ScalarOps.Conv <T>(0.1)));
                    Output     = Variable <T>(PartialShape.Create(-1, c, h, w));
                    Workspace1 = AuxVariable <byte>();
                    Workspace2 = AuxVariable <byte>();

                    AddInput(Data);
                    AddInput(Weight);
                    AddInput(Bias);
                    AddOutput(Output);
                    AddAuxVar(Workspace1);
                    AddAuxVar(Workspace2);
                }
        }
Esempio n. 2
0
        public override void DoConvolutionGradient(Volume <float> filters, Volume <float> outputGradients,
                                                   Volume <float> inputGradient, Volume <float> filterGradient, int pad,
                                                   int stride)
        {
            var inputStorage          = this._volumeStorage;
            var outputGradientStorage = outputGradients.Storage as VolumeStorage;
            var filterStorage         = filters.Storage as VolumeStorage;
            var inputGradientStorage  = inputGradient.Storage as VolumeStorage;
            var filterGradientStorage = filterGradient.Storage as VolumeStorage;

            // Copy to device if not already done
            inputStorage.CopyToDevice();
            outputGradientStorage.CopyToDevice();
            filterStorage.CopyToDevice();
            inputGradientStorage.CopyToDevice();
            filterGradientStorage.CopyToDevice();

            using (var dataDesc = new TensorDescriptor())
                using (var filterDesc = new FilterDescriptor())
                    using (var dDataDesc = new TensorDescriptor())
                        using (var dOutputDesc = new TensorDescriptor())
                            using (var dfilterDesc = new FilterDescriptor())
                                using (var convolutionDesc = new ConvolutionDescriptor())
                                {
                                    convolutionDesc.SetConvolution2dDescriptor(pad, pad, stride, stride, 1, 1,
                                                                               cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);

                                    dataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                                   this.Shape.GetDimension(3),
                                                                   this.Shape.GetDimension(2),
                                                                   this.Shape.GetDimension(1),
                                                                   this.Shape.GetDimension(0));

                                    dDataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                                    this.Shape.GetDimension(3),
                                                                    this.Shape.GetDimension(2),
                                                                    this.Shape.GetDimension(1),
                                                                    this.Shape.GetDimension(0));

                                    dOutputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                                      outputGradients.Shape.GetDimension(3),
                                                                      outputGradients.Shape.GetDimension(2),
                                                                      outputGradients.Shape.GetDimension(1),
                                                                      outputGradients.Shape.GetDimension(0));

                                    filterDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW,
                                                                     filters.Shape.GetDimension(3),
                                                                     filters.Shape.GetDimension(2),
                                                                     filters.Shape.GetDimension(1),
                                                                     filters.Shape.GetDimension(0));

                                    dfilterDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW,
                                                                      filters.Shape.GetDimension(3),
                                                                      filters.Shape.GetDimension(2),
                                                                      filters.Shape.GetDimension(1),
                                                                      filters.Shape.GetDimension(0));

                                    var filterAlgo = this._context.CudnnContext.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc,
                                                                                                                      convolutionDesc, dfilterDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero);
                                    var filterWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(dataDesc,
                                                                                                                                   dOutputDesc, convolutionDesc, dfilterDesc, filterAlgo);
                                    filterWorkspaceSize = filterWorkspaceSize == 0 ? new SizeT(1) : filterWorkspaceSize;

                                    var dataAlgo = this._context.CudnnContext.GetConvolutionBackwardDataAlgorithm(filterDesc, dOutputDesc,
                                                                                                                  convolutionDesc, dDataDesc, cudnnConvolutionBwdDataPreference.PreferFastest, IntPtr.Zero);
                                    var dataWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardDataWorkspaceSize(dfilterDesc,
                                                                                                                               dOutputDesc, convolutionDesc, dDataDesc, dataAlgo);
                                    dataWorkspaceSize = dataWorkspaceSize == 0 ? new SizeT(1) : dataWorkspaceSize;

                                    // filter
                                    if (this._volumeStorage.ConvolutionBackwardFilterStorage == null || this._volumeStorage.ConvolutionBackwardFilterStorage.Size != filterWorkspaceSize)
                                    {
                                        this._volumeStorage.ConvolutionBackwardFilterStorage = new CudaDeviceVariable <byte>(filterWorkspaceSize);
                                    }
                                    this._context.CudnnContext.ConvolutionBackwardFilter(1.0f, dataDesc, inputStorage.DeviceBuffer, dOutputDesc,
                                                                                         outputGradientStorage.DeviceBuffer, convolutionDesc, filterAlgo,
                                                                                         this._volumeStorage.ConvolutionBackwardFilterStorage, 0.0f, dfilterDesc,
                                                                                         filterGradientStorage.DeviceBuffer);

                                    // data
                                    if (this._volumeStorage.ConvolutionBackwardStorage == null || this._volumeStorage.ConvolutionBackwardStorage.Size != dataWorkspaceSize)
                                    {
                                        this._volumeStorage.ConvolutionBackwardStorage = new CudaDeviceVariable <byte>(dataWorkspaceSize);
                                    }

                                    this._context.CudnnContext.ConvolutionBackwardData(1.0f,
                                                                                       filterDesc, filterStorage.DeviceBuffer,
                                                                                       dOutputDesc, outputGradientStorage.DeviceBuffer,
                                                                                       convolutionDesc, dataAlgo,
                                                                                       this._volumeStorage.ConvolutionBackwardStorage, 0.0f,
                                                                                       dDataDesc, inputGradientStorage.DeviceBuffer);
                                }
        }
Esempio n. 3
0
        public override void DoConvolution(Volume <float> filters, int pad, int stride, Volume <float> result)
        {
            var resultStorage = result.Storage as VolumeStorage;

            if (resultStorage == null)
            {
                throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result));
            }

            var inputStorage  = this._volumeStorage;
            var filterStorage = filters.Storage as VolumeStorage;

            // Copy to device if not already done
            inputStorage.CopyToDevice();
            filterStorage.CopyToDevice();
            resultStorage.CopyToDevice();

            // Synchro
            this._context.DefaultStream.Synchronize();

            using (var dataDesc = new TensorDescriptor())
                using (var filterDesc = new FilterDescriptor())
                    using (var outputDesc = new TensorDescriptor())
                        using (var convolutionDesc = new ConvolutionDescriptor())
                        {
                            convolutionDesc.SetConvolution2dDescriptor(pad, pad, stride, stride, 1, 1,
                                                                       cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);

                            dataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                           this.Shape.GetDimension(3),
                                                           this.Shape.GetDimension(2),
                                                           this.Shape.GetDimension(1),
                                                           this.Shape.GetDimension(0));

                            filterDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW,
                                                             filters.Shape.GetDimension(3),
                                                             filters.Shape.GetDimension(2),
                                                             filters.Shape.GetDimension(1),
                                                             filters.Shape.GetDimension(0));

                            outputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float,
                                                             result.Shape.GetDimension(3),
                                                             result.Shape.GetDimension(2),
                                                             result.Shape.GetDimension(1),
                                                             result.Shape.GetDimension(0));

                            var algo = this._context.CudnnContext.GetConvolutionForwardAlgorithm(
                                dataDesc, filterDesc,
                                convolutionDesc, outputDesc,
                                cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero);

                            var workspaceSize = this._context.CudnnContext.GetConvolutionForwardWorkspaceSize(
                                dataDesc, filterDesc,
                                convolutionDesc, outputDesc, algo);
                            workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize;

                            if (this._volumeStorage.ConvolutionStorage == null || this._volumeStorage.ConvolutionStorage.Size != workspaceSize)
                            {
                                this._volumeStorage.ConvolutionStorage = new CudaDeviceVariable <byte>(workspaceSize);
                            }

                            this._context.CudnnContext.ConvolutionForward(1.0f,
                                                                          dataDesc, inputStorage.DeviceBuffer,
                                                                          filterDesc, filterStorage.DeviceBuffer,
                                                                          convolutionDesc, algo, this._volumeStorage.ConvolutionStorage, 0.0f,
                                                                          outputDesc, resultStorage.DeviceBuffer);
                        }
        }
        public ConvolutionalLayer(int widthIn, int heightIn, int channelsIn, int widthOut, int heightOut, int channelsOut, int batch, int filterWidth, int filterHeight, Activation activation, CudaBlas blasCtx, CudaDNNContext cudnnCtx, CudaContext ctx, CUmodule moduleBorder, CUmodule modulePrelu)
            : base(widthIn, heightIn, channelsIn, widthOut, heightOut, channelsOut, batch)
        {
            _activation      = activation;
            _filterX         = filterWidth;
            _filterY         = filterHeight;
            _weights         = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _d_weights       = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _bias            = new CudaDeviceVariable <float>(channelsOut);
            _d_bias          = new CudaDeviceVariable <float>(channelsOut);
            _dx              = new CudaDeviceVariable <float>(widthIn * heightIn * channelsIn * batch);
            _y               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _dy              = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _z               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _ones            = new CudaDeviceVariable <float>(batch);
            _withBorderInput = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _withBorderDx    = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _cudnn           = cudnnCtx;
            _blas            = blasCtx;
            _descActivation  = new ActivationDescriptor();
            _descActivation.SetActivationDescriptor(cudnnActivationMode.Relu, cudnnNanPropagation.NotPropagateNan, 0);
            _descBias = new TensorDescriptor();
            _descBias.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, 1, channelsOut, 1, 1);
            _descDataInBorder = new TensorDescriptor();
            _descDataIn       = new TensorDescriptor();
            _descDataIn.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsIn, heightIn + filterHeight - 1, widthIn + filterWidth - 1);
            _descDataOut = new TensorDescriptor();
            _descDataOut.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsOut, heightOut, widthOut);
            _descFilter = new FilterDescriptor();
            _descFilter.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, channelsOut, channelsIn, filterWidth, filterHeight);
            _descConv       = new ConvolutionDescriptor();
            _descConvBorder = new ConvolutionDescriptor();
            _descConv.SetConvolution2dDescriptor(0, 0, 1, 1, 1, 1, cudnnConvolutionMode.Convolution, cudnnDataType.Float);

            int n = 0;
            int c = 0;
            int h = 0;
            int w = 0;

            _descConv.GetConvolution2dForwardOutputDim(_descDataIn, _descFilter, ref n, ref c, ref h, ref w);

            _kernelAddBorder = new AddBorderKernel(moduleBorder, ctx);
            _kernelAddBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn + filterWidth - 1, (heightIn + filterHeight - 1) / 2 + 1, 1);
            _kernelCropBorder = new CropBorderKernel(moduleBorder, ctx);
            _kernelCropBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn, heightIn / 2 + 1, 1);

            if (_activation == Activation.PRelu || _activation == Activation.LeakyRelu)
            {
                _temp                 = new CudaDeviceVariable <float>(channelsOut * batch);
                _aRelu                = new CudaDeviceVariable <float>(channelsOut);
                _dARelu               = new CudaDeviceVariable <float>(channelsOut);
                _KernelPReluForward   = new PReluForwardKernel(modulePrelu, ctx);
                _KernelPReluBackward  = new PReluBackwardKernel(modulePrelu, ctx);
                _KernelPReluBackward1 = new PReluBackward1Kernel(modulePrelu, ctx);
                _KernelPReluBackward2 = new PReluBackward2Kernel(modulePrelu, ctx);
                _KernelPReluForward.SetComputeSize((uint)widthOut * (uint)heightOut, (uint)channelsOut, (uint)batch);
                _KernelPReluBackward.SetComputeSize((uint)channelsOut, 1, 1);
            }

            cudnnConvolutionFwdAlgoPerf[] algos =
                _cudnn.FindConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv, _descDataOut, 5);

            cudnnConvolutionBwdDataAlgoPerf[] algos2 = _cudnn.FindConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, 5);

            _algoFwd = _cudnn.GetConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv,
                                                             _descDataOut, cudnnConvolutionFwdPreference.PreferFastest, 0);


            SizeT sizeInBytes = 0, tmpsize = 0;

            sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_descDataIn, _descFilter,
                                                                    _descConv, _descDataOut, _algoFwd);

            _algoBwdFilter = _cudnn.GetConvolutionBackwardFilterAlgorithm(_descDataIn, _descDataOut, _descConv, _descFilter,
                                                                          cudnnConvolutionBwdFilterPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardFilterWorkspaceSize(_descDataIn, _descDataOut, _descConv, _descFilter, _algoBwdFilter);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            _algoBwdData = _cudnn.GetConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, cudnnConvolutionBwdDataPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardDataWorkspaceSize(_descFilter, _descDataOut, _descConv, _descDataIn, _algoBwdData);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            if (sizeInBytes > 0)
            {
                _workspace = new CudaDeviceVariable <byte>(sizeInBytes);
            }
            else
            {
                _workspace = CudaDeviceVariable <byte> .Null;
            }
        }
Esempio n. 5
0
            public InternalLayer(ILinearAlgebraProvider lap, int inputSize, int outputSize, IActivationFunction activation, ConvolutionDescriptor descriptor, bool disableUpdate)
            {
                _inputSize     = inputSize;
                _outputSize    = outputSize;
                _activation    = activation;
                _descriptor    = descriptor;
                _disableUpdate = disableUpdate;

                var weightInit = lap.NN.GetWeightInitialisation(descriptor.WeightInitialisation);

                _bias   = lap.Create(outputSize, x => weightInit.GetBias());
                _weight = lap.Create(inputSize, outputSize, (x, y) => weightInit.GetWeight(inputSize, outputSize, x, y));
            }
Esempio n. 6
0
        public static void ReducedMNIST(string dataFilesPath)
        {
            Console.Write("Loading training data...");
            var trainingData = Mnist.Load(dataFilesPath + "train-labels.idx1-ubyte", dataFilesPath + "train-images.idx3-ubyte");
            var testData     = Mnist.Load(dataFilesPath + "t10k-labels.idx1-ubyte", dataFilesPath + "t10k-images.idx3-ubyte");

            Console.WriteLine("done");

            var onesAndZeroesTraining = trainingData.Where(s => s.Label == 0 || s.Label == 1).Shuffle(0).Take(1000).ToList();
            var onesAndZeroesTest     = testData.Where(s => s.Label == 0 || s.Label == 1).Shuffle(0).Take(100).ToList();

            using (var lap = GPUProvider.CreateLinearAlgebra(false)) {
                var convolutionDescriptor = new ConvolutionDescriptor(0.1f)
                {
                    Stride               = 1,
                    Padding              = 1,
                    FilterDepth          = 4,
                    FilterHeight         = 3,
                    FilterWidth          = 3,
                    WeightInitialisation = WeightInitialisationType.Xavier,
                    WeightUpdate         = WeightUpdateType.RMSprop,
                    Activation           = ActivationType.LeakyRelu
                };

                const int   BATCH_SIZE = 128, NUM_EPOCHS = 2, IMAGE_WIDTH = 28;
                const float TRAINING_RATE = 0.03f;
                var         errorMetric   = ErrorMetricType.OneHot.Create();
                var         layerTemplate = new LayerDescriptor(0.1f)
                {
                    WeightUpdate = WeightUpdateType.RMSprop,
                    Activation   = ActivationType.LeakyRelu
                };

                var trainingSamples = onesAndZeroesTraining.Select(d => d.AsVolume).Select(d => Tuple.Create(d.AsTensor(lap), d.ExpectedOutput)).ToList();
                var testSamples     = onesAndZeroesTest.Select(d => d.AsVolume).Select(d => Tuple.Create(d.AsTensor(lap), d.ExpectedOutput)).ToList();

                // create a network with a single convolutional layer followed by a max pooling layer
                var convolutionalLayer = new IConvolutionalLayer [] {
                    lap.NN.CreateConvolutionalLayer(convolutionDescriptor, 1, IMAGE_WIDTH, false),
                    lap.NN.CreateMaxPoolingLayer(2, 2, 2)
                };
                var trainingDataProvider = lap.NN.CreateConvolutionalTrainingProvider(convolutionDescriptor, trainingSamples, convolutionalLayer, true);
                var testDataProvider     = lap.NN.CreateConvolutionalTrainingProvider(convolutionDescriptor, testSamples, convolutionalLayer, false);

                ConvolutionalNetwork network;
                using (var trainer = lap.NN.CreateBatchTrainer(layerTemplate, 784, trainingDataProvider.OutputSize)) {
                    var trainingContext = lap.NN.CreateTrainingContext(errorMetric, TRAINING_RATE, BATCH_SIZE);
                    trainingContext.EpochComplete += c => {
                        var output    = trainer.Execute(testDataProvider.TrainingDataProvider);
                        var testError = output.Select(d => errorMetric.Compute(d.Output, d.ExpectedOutput)).Average();
                        trainingContext.WriteScore(testError, errorMetric.DisplayAsPercentage);
                    };
                    trainer.Train(trainingDataProvider.TrainingDataProvider, NUM_EPOCHS, trainingContext);

                    network = trainingDataProvider.GetCurrentNetwork(trainer);
                }
                foreach (var layer in convolutionalLayer)
                {
                    layer.Dispose();
                }
                foreach (var item in trainingSamples)
                {
                    item.Item1.Dispose();
                }
                foreach (var item in testSamples)
                {
                    item.Item1.Dispose();
                }

                int correct = 0, total = 0;
                using (var execution = lap.NN.CreateConvolutional(network)) {
                    foreach (var item in onesAndZeroesTest)
                    {
                        using (var tensor = item.AsVolume.AsTensor(lap)) {
                            using (var output = execution.Execute(tensor)) {
                                var maxIndex = output.MaximumIndex();
                                if (maxIndex == item.Label)
                                {
                                    ++correct;
                                }
                                ++total;
                            }
                        }
                    }
                }
                Console.WriteLine($"Execution results: {(double)correct / total:P0} correct");
            }
        }
Esempio n. 7
0
        public override void ConvolutionGradient(Volume <double> filters, Volume <double> outputGradients,
                                                 Volume <double> filterGradient, int xpad, int ypad, int stride, Volume <double> inputGradient)
        {
            var inputStorage          = this._volumeStorage;
            var outputGradientStorage = outputGradients.Storage as VolumeStorage;
            var filterStorage         = filters.Storage as VolumeStorage;
            var inputGradientStorage  = inputGradient.Storage as VolumeStorage;
            var filterGradientStorage = filterGradient.Storage as VolumeStorage;

            // Copy to device if not already done
            inputStorage.CopyToDevice();
            outputGradientStorage.CopyToDevice();
            filterStorage.CopyToDevice();
            inputGradientStorage.CopyToDevice();
            filterGradientStorage.CopyToDevice();

            using var dataDesc        = new TensorDescriptor();
            using var filterDesc      = new FilterDescriptor();
            using var dDataDesc       = new TensorDescriptor();
            using var dOutputDesc     = new TensorDescriptor();
            using var dfilterDesc     = new FilterDescriptor();
            using var convolutionDesc = new ConvolutionDescriptor();

            convolutionDesc.SetConvolution2dDescriptor(ypad, xpad, stride, stride, 1, 1,
                                                       cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Double);

            dataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double,
                                           this.Shape.Dimensions[3],
                                           this.Shape.Dimensions[2],
                                           this.Shape.Dimensions[1],
                                           this.Shape.Dimensions[0]);

            dDataDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double,
                                            this.Shape.Dimensions[3],
                                            this.Shape.Dimensions[2],
                                            this.Shape.Dimensions[1],
                                            this.Shape.Dimensions[0]);

            dOutputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double,
                                              outputGradients.Shape.Dimensions[3],
                                              outputGradients.Shape.Dimensions[2],
                                              outputGradients.Shape.Dimensions[1],
                                              outputGradients.Shape.Dimensions[0]);

            filterDesc.SetFilter4dDescriptor(cudnnDataType.Double, cudnnTensorFormat.NCHW,
                                             filters.Shape.Dimensions[3],
                                             filters.Shape.Dimensions[2],
                                             filters.Shape.Dimensions[1],
                                             filters.Shape.Dimensions[0]);

            dfilterDesc.SetFilter4dDescriptor(cudnnDataType.Double, cudnnTensorFormat.NCHW,
                                              filters.Shape.Dimensions[3],
                                              filters.Shape.Dimensions[2],
                                              filters.Shape.Dimensions[1],
                                              filters.Shape.Dimensions[0]);

            var filterAlgo = this._context.CudnnContext.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc,
                                                                                              convolutionDesc, dfilterDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero);
            var filterWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(dataDesc,
                                                                                                           dOutputDesc, convolutionDesc, dfilterDesc, filterAlgo);

            filterWorkspaceSize = filterWorkspaceSize == 0 ? new SizeT(1) : filterWorkspaceSize;

            var dataAlgo = this._context.CudnnContext.GetConvolutionBackwardDataAlgorithm(filterDesc, dOutputDesc,
                                                                                          convolutionDesc, dDataDesc, cudnnConvolutionBwdDataPreference.PreferFastest, IntPtr.Zero);
            var dataWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardDataWorkspaceSize(dfilterDesc,
                                                                                                       dOutputDesc, convolutionDesc, dDataDesc, dataAlgo);

            dataWorkspaceSize = dataWorkspaceSize == 0 ? new SizeT(1) : dataWorkspaceSize;

            // filter
            if (inputGradientStorage.ConvolutionBackwardFilterStorage == null || inputGradientStorage.ConvolutionBackwardFilterStorage.Size != filterWorkspaceSize)
            {
                inputGradientStorage.ConvolutionBackwardFilterStorage = new CudaDeviceVariable <byte>(filterWorkspaceSize);
            }

            this._context.CudnnContext.ConvolutionBackwardFilter(1.0, dataDesc, inputStorage.DeviceBuffer, dOutputDesc,
                                                                 outputGradientStorage.DeviceBuffer, convolutionDesc, filterAlgo,
                                                                 inputGradientStorage.ConvolutionBackwardFilterStorage, 0.0, dfilterDesc,
                                                                 filterGradientStorage.DeviceBuffer);

            // data
            if (inputGradientStorage.ConvolutionBackwardStorage == null || inputGradientStorage.ConvolutionBackwardStorage.Size != dataWorkspaceSize)
            {
                inputGradientStorage.ConvolutionBackwardStorage = new CudaDeviceVariable <byte>(dataWorkspaceSize);
            }

            this._context.CudnnContext.ConvolutionBackwardData(1.0,
                                                               filterDesc, filterStorage.DeviceBuffer,
                                                               dOutputDesc, outputGradientStorage.DeviceBuffer,
                                                               convolutionDesc, dataAlgo,
                                                               inputGradientStorage.ConvolutionBackwardStorage, 0.0,
                                                               dDataDesc, inputGradientStorage.DeviceBuffer);
        }