Exemplo n.º 1
0
		public static extern cudnnStatus cudnnConvolutionBackwardFilter( cudnnHandle                 handle,
																	 ref double alpha,
																	 cudnnTensorDescriptor       srcDesc,
																	 CUdeviceptr srcData,
																	 cudnnTensorDescriptor       diffDesc,
																	 CUdeviceptr diffData,
																	 cudnnConvolutionDescriptor  convDesc,
																	 cudnnConvolutionBwdFilterAlgo     algo,
																	 CUdeviceptr workSpace,
																	 SizeT                              workSpaceSizeInBytes,
																	 ref double beta,
																	 cudnnFilterDescriptor       gradDesc,
																	 CUdeviceptr gradData
																   );
        public ConvolutionalLayer(int widthIn, int heightIn, int channelsIn, int widthOut, int heightOut, int channelsOut, int batch, int filterWidth, int filterHeight, Activation activation, CudaBlas blasCtx, CudaDNNContext cudnnCtx, CudaContext ctx, CUmodule moduleBorder, CUmodule modulePrelu)
            : base(widthIn, heightIn, channelsIn, widthOut, heightOut, channelsOut, batch)
        {
            _activation      = activation;
            _filterX         = filterWidth;
            _filterY         = filterHeight;
            _weights         = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _d_weights       = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _bias            = new CudaDeviceVariable <float>(channelsOut);
            _d_bias          = new CudaDeviceVariable <float>(channelsOut);
            _dx              = new CudaDeviceVariable <float>(widthIn * heightIn * channelsIn * batch);
            _y               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _dy              = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _z               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _ones            = new CudaDeviceVariable <float>(batch);
            _withBorderInput = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _withBorderDx    = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _cudnn           = cudnnCtx;
            _blas            = blasCtx;
            _descActivation  = new ActivationDescriptor();
            _descActivation.SetActivationDescriptor(cudnnActivationMode.Relu, cudnnNanPropagation.NotPropagateNan, 0);
            _descBias = new TensorDescriptor();
            _descBias.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, 1, channelsOut, 1, 1);
            _descDataInBorder = new TensorDescriptor();
            _descDataIn       = new TensorDescriptor();
            _descDataIn.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsIn, heightIn + filterHeight - 1, widthIn + filterWidth - 1);
            _descDataOut = new TensorDescriptor();
            _descDataOut.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsOut, heightOut, widthOut);
            _descFilter = new FilterDescriptor();
            _descFilter.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, channelsOut, channelsIn, filterWidth, filterHeight);
            _descConv       = new ConvolutionDescriptor();
            _descConvBorder = new ConvolutionDescriptor();
            _descConv.SetConvolution2dDescriptor(0, 0, 1, 1, 1, 1, cudnnConvolutionMode.Convolution, cudnnDataType.Float);

            int n = 0;
            int c = 0;
            int h = 0;
            int w = 0;

            _descConv.GetConvolution2dForwardOutputDim(_descDataIn, _descFilter, ref n, ref c, ref h, ref w);

            _kernelAddBorder = new AddBorderKernel(moduleBorder, ctx);
            _kernelAddBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn + filterWidth - 1, (heightIn + filterHeight - 1) / 2 + 1, 1);
            _kernelCropBorder = new CropBorderKernel(moduleBorder, ctx);
            _kernelCropBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn, heightIn / 2 + 1, 1);

            if (_activation == Activation.PRelu || _activation == Activation.LeakyRelu)
            {
                _temp                 = new CudaDeviceVariable <float>(channelsOut * batch);
                _aRelu                = new CudaDeviceVariable <float>(channelsOut);
                _dARelu               = new CudaDeviceVariable <float>(channelsOut);
                _KernelPReluForward   = new PReluForwardKernel(modulePrelu, ctx);
                _KernelPReluBackward  = new PReluBackwardKernel(modulePrelu, ctx);
                _KernelPReluBackward1 = new PReluBackward1Kernel(modulePrelu, ctx);
                _KernelPReluBackward2 = new PReluBackward2Kernel(modulePrelu, ctx);
                _KernelPReluForward.SetComputeSize((uint)widthOut * (uint)heightOut, (uint)channelsOut, (uint)batch);
                _KernelPReluBackward.SetComputeSize((uint)channelsOut, 1, 1);
            }

            cudnnConvolutionFwdAlgoPerf[] algos =
                _cudnn.FindConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv, _descDataOut, 5);

            cudnnConvolutionBwdDataAlgoPerf[] algos2 = _cudnn.FindConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, 5);

            _algoFwd = _cudnn.GetConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv,
                                                             _descDataOut, cudnnConvolutionFwdPreference.PreferFastest, 0);


            SizeT sizeInBytes = 0, tmpsize = 0;

            sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_descDataIn, _descFilter,
                                                                    _descConv, _descDataOut, _algoFwd);

            _algoBwdFilter = _cudnn.GetConvolutionBackwardFilterAlgorithm(_descDataIn, _descDataOut, _descConv, _descFilter,
                                                                          cudnnConvolutionBwdFilterPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardFilterWorkspaceSize(_descDataIn, _descDataOut, _descConv, _descFilter, _algoBwdFilter);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            _algoBwdData = _cudnn.GetConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, cudnnConvolutionBwdDataPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardDataWorkspaceSize(_descFilter, _descDataOut, _descConv, _descDataIn, _algoBwdData);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            if (sizeInBytes > 0)
            {
                _workspace = new CudaDeviceVariable <byte>(sizeInBytes);
            }
            else
            {
                _workspace = CudaDeviceVariable <byte> .Null;
            }
        }
Exemplo n.º 3
0
		public static extern cudnnStatus cudnnGetConvolutionBackwardFilterWorkspaceSize( cudnnHandle          handle, 
																				  cudnnTensorDescriptor       srcDesc,
																				  cudnnTensorDescriptor       diffDesc,
																				  cudnnConvolutionDescriptor  convDesc,  
																				  cudnnFilterDescriptor       gradDesc,
																				  cudnnConvolutionBwdFilterAlgo     algo,
																				  ref SizeT                         sizeInBytes
																				);
Exemplo n.º 4
0
		public static extern cudnnStatus cudnnGetConvolutionBackwardFilterAlgorithm( cudnnHandle             handle,
                                                                      cudnnTensorDescriptor          srcDesc,
                                                                      cudnnTensorDescriptor          diffDesc,
                                                                      cudnnConvolutionDescriptor     convDesc, 
                                                                      cudnnFilterDescriptor          gradDesc,
                                                                      cudnnConvolutionBwdFilterPreference  preference,
                                                                      SizeT                                memoryLimitInbytes,
                                                                      ref cudnnConvolutionBwdFilterAlgo algo
                                                                     );
Exemplo n.º 5
0
		/// <summary>
		/// This function computes the convolution gradient with respect to filter coefficients using
		/// the specified algo, returning results in gradDesc.Scaling factors alpha and beta can be
		/// used to scale the input tensor and the output tensor respectively.
		/// </summary>
		/// <param name="alpha">Pointer to scaling factors (in host memory) used to blend the computation
		/// result with prior value in the output layer as follows: dstValue =
		/// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for
		/// additional details.</param>
		/// <param name="srcDesc">Handle to a previously initialized tensor descriptor.</param>
		/// <param name="srcData">Data pointer to GPU memory associated with the tensor descriptor srcDesc.</param>
		/// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param>
		/// <param name="diffData">Data pointer to GPU memory associated with the input differential tensor descriptor diffDesc.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="algo">Enumerant that specifies which convolution algorithm shoud be used to compute the results</param>
		/// <param name="workSpace">Data pointer to GPU memory to a workspace needed to able to execute
		/// the specified algorithm. If no workspace is needed for a particular
		/// algorithm, that pointer can be nil</param>
		/// <param name="beta">Pointer to scaling factors (in host memory) used to blend the computation
		/// result with prior value in the output layer as follows: dstValue =
		/// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for
		/// additional details.</param>
		/// <param name="gradDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="gradData">Data pointer to GPU memory associated with the filter descriptor
		/// gradDesc that carries the result.</param> 
		public void ConvolutionBackwardFilter(double alpha,
												TensorDescriptor srcDesc,
												CudaDeviceVariable<double> srcData,
												TensorDescriptor diffDesc,
												CudaDeviceVariable<double> diffData,
												ConvolutionDescriptor convDesc,
												cudnnConvolutionBwdFilterAlgo algo,
												CudaDeviceVariable<byte> workSpace,
												double beta,
												FilterDescriptor gradDesc,
												CudaDeviceVariable<double> gradData
											)
		{
			res = CudaDNNNativeMethods.cudnnConvolutionBackwardFilter(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpace.SizeInBytes, ref beta, gradDesc.Desc, gradData.DevicePointer);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardFilter", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
		}
Exemplo n.º 6
0
		/// <summary>
		/// This function returns the amount of GPU memory workspace the user needs
		/// to allocate to be able to call cudnnConvolutionBackwardFilter_v3 with the
		/// specified algorithm. The workspace allocated will then be passed to the routine
		/// cudnnConvolutionBackwardFilter_v3. The specified algorithm can be the result
		/// of the call to cudnnGetConvolutionBackwardFilterAlgorithm or can be chosen
		/// arbitrarily by the user. Note that not every algorithm is available for every configuration
		/// of the input tensor and/or every configuration of the convolution descriptor.
		/// </summary>
		/// <param name="srcDesc">Handle to the previously initialized input tensor descriptor.</param>
		/// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="gradDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="algo">Enumerant that specifies the chosen convolution algorithm
		/// sizeInBytes output Amount of GPU memory needed as workspace to be able to execute</param>
		/// <returns>Amount of GPU memory needed as workspace to be able to execute a
		/// forward convolution with the specified algo</returns>
		public SizeT GetConvolutionBackwardFilterWorkspaceSize(TensorDescriptor srcDesc,
																	TensorDescriptor diffDesc,
																	ConvolutionDescriptor convDesc,
																	FilterDescriptor gradDesc,
																	cudnnConvolutionBwdFilterAlgo algo
																)
		{
			SizeT sizeInBytes = new SizeT();
			res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardFilterWorkspaceSize(_handle, srcDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, algo, ref sizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardFilterWorkspaceSize", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
			return sizeInBytes;
		}
Exemplo n.º 7
0
		/// <summary>
		/// This function serves as a heuristic for obtaining the best suited algorithm for
		/// cudnnConvolutionBackwardFilter_v3 for the given layer specifications. Based
		/// on the input preference, this function will either return the fastest algorithm or the
		/// fastest algorithm within a given memory limit. For an exhaustive search for the fastest
		/// algorithm, please use cudnnFindConvolutionBackwardFilterAlgorithm.
		/// </summary>
		/// <param name="srcDesc">Handle to the previously initialized input tensor descriptor.</param>
		/// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="gradDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="preference">Enumerant to express the preference criteria in terms of memory requirement and speed.</param>
		/// <param name="memoryLimitInbytes">It is to specify the maximum amount of GPU memory the user is willing to 
		/// use as a workspace. This is currently a placeholder and is not used.</param>
		/// <returns>Enumerant that specifies which convolution algorithm should be used to
		/// compute the results according to the specified preference</returns>
		public cudnnConvolutionBwdFilterAlgo GetConvolutionBackwardFilterAlgorithm(TensorDescriptor srcDesc,
															TensorDescriptor diffDesc,
															ConvolutionDescriptor convDesc,
															FilterDescriptor gradDesc,
															cudnnConvolutionBwdFilterPreference preference,
															SizeT memoryLimitInbytes
															)
		{
			cudnnConvolutionBwdFilterAlgo algo = new cudnnConvolutionBwdFilterAlgo();
			res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardFilterAlgorithm(_handle, srcDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, preference, memoryLimitInbytes, ref algo);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardFilterAlgorithm", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
			return algo;
		}