Exemple #1
0
        private void DoActivation(Volume <float> result, cudnnActivationMode mode)
        {
            var resultStorage = result.Storage as VolumeStorage;

            if (resultStorage == null)
            {
                throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result));
            }

            // Copy to device if not already done
            this._volumeStorage.CopyToDevice();
            resultStorage.CopyToDevice();

            // Synchro
            this._context.DefaultStream.Synchronize();

            // Relu
            using (var activationDesc = new ActivationDescriptor())
                using (var srcDesc = new TensorDescriptor())
                    using (var resultDesc = new TensorDescriptor())
                    {
                        var n = result.Shape.GetDimension(3);
                        var c = result.Shape.GetDimension(2);
                        var h = result.Shape.GetDimension(1);
                        var w = result.Shape.GetDimension(0);

                        srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w);
                        resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w);
                        activationDesc.SetActivationDescriptor(mode, cudnnNanPropagation.NotPropagateNan, 0.0);

                        this._context.CudnnContext.ActivationForward(activationDesc,
                                                                     1.0f, srcDesc, this._volumeStorage.DeviceBuffer,
                                                                     0.0f, resultDesc, resultStorage.DeviceBuffer);
                    }
        }
Exemple #2
0
        public static void ActivationBackward(Tensor x, Tensor y, Tensor dx, Tensor dy, DNNActivation activationType, double clippedReluCeiling)
        {
            using (var dnn = CudaHelpers.TSContextForTensor(x).DNNForTensor(x))
            {
                var activationDesc = new ActivationDescriptor();
                activationDesc.SetActivationDescriptor((cudnnActivationMode)activationType,
                                                       cudnnNanPropagation.PropagateNan,
                                                       clippedReluCeiling);

                using (var xPtr = GetDeviceVar(x))
                    using (var yPtr = GetDeviceVar(y))
                        using (var dxPtr = GetDeviceVar(dx))
                            using (var dyPtr = GetDeviceVar(dy))
                                using (var xDesc = GetDescriptor(x))
                                    using (var yDesc = GetDescriptor(y))
                                        using (var dxDesc = GetDescriptor(dx))
                                            using (var dyDesc = GetDescriptor(dy))
                                            {
                                                dnn.Value.ActivationBackward(activationDesc, 1,
                                                                             xDesc, xPtr,
                                                                             dxDesc, dxPtr,
                                                                             yDesc, yPtr,
                                                                             0,
                                                                             dyDesc, dyPtr);
                                            }
            }
        }
Exemple #3
0
        private void DoActivationGradient(Volume <double> input, Volume <double> outputGradient,
                                          Volume <double> inputGradient, cudnnActivationMode mode)
        {
            var inputStorage          = input.Storage as VolumeStorage;
            var inputGradientStorage  = inputGradient.Storage as VolumeStorage;
            var outputStorage         = this._volumeStorage;
            var outputGradientStorage = outputGradient.Storage as VolumeStorage;

            // Copy to device if not already done
            outputStorage.CopyToDevice();
            outputGradientStorage.CopyToDevice();
            inputGradientStorage.CopyToDevice();

            // Synchro
            this._context.DefaultStream.Synchronize();

            using (var activationDesc = new ActivationDescriptor())
                using (var srcDesc = new TensorDescriptor())
                    using (var srcDiffDesc = new TensorDescriptor())
                        using (var destDesc = new TensorDescriptor())
                            using (var destDiffDesc = new TensorDescriptor())
                            {
                                var n = this.Shape.GetDimension(3);
                                var c = this.Shape.GetDimension(2);
                                var h = this.Shape.GetDimension(1);
                                var w = this.Shape.GetDimension(0);

                                srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w);
                                srcDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w);
                                destDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w);
                                destDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w);

                                activationDesc.SetActivationDescriptor(mode, cudnnNanPropagation.NotPropagateNan,
                                                                       0.0);

                                this._context.CudnnContext.ActivationBackward(activationDesc, 1.0,
                                                                              srcDesc, outputStorage.DeviceBuffer,
                                                                              srcDiffDesc, outputGradientStorage.DeviceBuffer,
                                                                              destDesc, inputStorage.DeviceBuffer,
                                                                              0.0,
                                                                              destDiffDesc, inputGradientStorage.DeviceBuffer);
                            }

            inputGradientStorage.CopiedToDevice = true;
        }
Exemple #4
0
        public override void DoSoftMaxGradient(Volume <double> outputGradient, Volume <double> inputGradient)
        {
            var inputGradientStorage  = (VolumeStorage)inputGradient.Storage;
            var outputGradientStorage = (VolumeStorage)outputGradient.Storage;
            var outputStorage         = this._volumeStorage;

            // Copy to device if not already done
            outputStorage.CopyToDevice();
            outputGradientStorage.CopyToDevice();
            inputGradientStorage.CopyToDevice();

            // Synchro
            this._context.DefaultStream.Synchronize();

            using (var activationDesc = new ActivationDescriptor())
                using (var srcDesc = new TensorDescriptor())
                    using (var srcDiffDesc = new TensorDescriptor())
                        using (var destDiffDesc = new TensorDescriptor())
                        {
                            var n = this.Shape.GetDimension(3);
                            var c = this.Shape.GetDimension(2);
                            var h = this.Shape.GetDimension(1);
                            var w = this.Shape.GetDimension(0);

                            srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w);
                            srcDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w);
                            destDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w);
                            activationDesc.SetActivationDescriptor(cudnnActivationMode.Relu, cudnnNanPropagation.PropagateNan, 0.0);

                            this._context.CudnnContext.SoftmaxBackward(cudnnSoftmaxAlgorithm.Accurate, cudnnSoftmaxMode.Channel, 1.0,
                                                                       srcDesc, outputStorage.DeviceBuffer,
                                                                       srcDiffDesc, outputGradientStorage.DeviceBuffer,
                                                                       0.0,
                                                                       destDiffDesc, inputGradientStorage.DeviceBuffer);

                            inputGradientStorage.CopiedToDevice = true;
                        }
        }
        public ConvolutionalLayer(int widthIn, int heightIn, int channelsIn, int widthOut, int heightOut, int channelsOut, int batch, int filterWidth, int filterHeight, Activation activation, CudaBlas blasCtx, CudaDNNContext cudnnCtx, CudaContext ctx, CUmodule moduleBorder, CUmodule modulePrelu)
            : base(widthIn, heightIn, channelsIn, widthOut, heightOut, channelsOut, batch)
        {
            _activation      = activation;
            _filterX         = filterWidth;
            _filterY         = filterHeight;
            _weights         = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _d_weights       = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _bias            = new CudaDeviceVariable <float>(channelsOut);
            _d_bias          = new CudaDeviceVariable <float>(channelsOut);
            _dx              = new CudaDeviceVariable <float>(widthIn * heightIn * channelsIn * batch);
            _y               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _dy              = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _z               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _ones            = new CudaDeviceVariable <float>(batch);
            _withBorderInput = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _withBorderDx    = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _cudnn           = cudnnCtx;
            _blas            = blasCtx;
            _descActivation  = new ActivationDescriptor();
            _descActivation.SetActivationDescriptor(cudnnActivationMode.Relu, cudnnNanPropagation.NotPropagateNan, 0);
            _descBias = new TensorDescriptor();
            _descBias.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, 1, channelsOut, 1, 1);
            _descDataInBorder = new TensorDescriptor();
            _descDataIn       = new TensorDescriptor();
            _descDataIn.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsIn, heightIn + filterHeight - 1, widthIn + filterWidth - 1);
            _descDataOut = new TensorDescriptor();
            _descDataOut.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsOut, heightOut, widthOut);
            _descFilter = new FilterDescriptor();
            _descFilter.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, channelsOut, channelsIn, filterWidth, filterHeight);
            _descConv       = new ConvolutionDescriptor();
            _descConvBorder = new ConvolutionDescriptor();
            _descConv.SetConvolution2dDescriptor(0, 0, 1, 1, 1, 1, cudnnConvolutionMode.Convolution, cudnnDataType.Float);

            int n = 0;
            int c = 0;
            int h = 0;
            int w = 0;

            _descConv.GetConvolution2dForwardOutputDim(_descDataIn, _descFilter, ref n, ref c, ref h, ref w);

            _kernelAddBorder = new AddBorderKernel(moduleBorder, ctx);
            _kernelAddBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn + filterWidth - 1, (heightIn + filterHeight - 1) / 2 + 1, 1);
            _kernelCropBorder = new CropBorderKernel(moduleBorder, ctx);
            _kernelCropBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn, heightIn / 2 + 1, 1);

            if (_activation == Activation.PRelu || _activation == Activation.LeakyRelu)
            {
                _temp                 = new CudaDeviceVariable <float>(channelsOut * batch);
                _aRelu                = new CudaDeviceVariable <float>(channelsOut);
                _dARelu               = new CudaDeviceVariable <float>(channelsOut);
                _KernelPReluForward   = new PReluForwardKernel(modulePrelu, ctx);
                _KernelPReluBackward  = new PReluBackwardKernel(modulePrelu, ctx);
                _KernelPReluBackward1 = new PReluBackward1Kernel(modulePrelu, ctx);
                _KernelPReluBackward2 = new PReluBackward2Kernel(modulePrelu, ctx);
                _KernelPReluForward.SetComputeSize((uint)widthOut * (uint)heightOut, (uint)channelsOut, (uint)batch);
                _KernelPReluBackward.SetComputeSize((uint)channelsOut, 1, 1);
            }

            cudnnConvolutionFwdAlgoPerf[] algos =
                _cudnn.FindConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv, _descDataOut, 5);

            cudnnConvolutionBwdDataAlgoPerf[] algos2 = _cudnn.FindConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, 5);

            _algoFwd = _cudnn.GetConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv,
                                                             _descDataOut, cudnnConvolutionFwdPreference.PreferFastest, 0);


            SizeT sizeInBytes = 0, tmpsize = 0;

            sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_descDataIn, _descFilter,
                                                                    _descConv, _descDataOut, _algoFwd);

            _algoBwdFilter = _cudnn.GetConvolutionBackwardFilterAlgorithm(_descDataIn, _descDataOut, _descConv, _descFilter,
                                                                          cudnnConvolutionBwdFilterPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardFilterWorkspaceSize(_descDataIn, _descDataOut, _descConv, _descFilter, _algoBwdFilter);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            _algoBwdData = _cudnn.GetConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, cudnnConvolutionBwdDataPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardDataWorkspaceSize(_descFilter, _descDataOut, _descConv, _descDataIn, _algoBwdData);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            if (sizeInBytes > 0)
            {
                _workspace = new CudaDeviceVariable <byte>(sizeInBytes);
            }
            else
            {
                _workspace = CudaDeviceVariable <byte> .Null;
            }
        }