예제 #1
0
		public static extern cudnnStatus cudnnConvolutionBackwardData( cudnnHandle handle,
																 ref double alpha,
																 cudnnFilterDescriptor       filterDesc,
																 CUdeviceptr filterData,
																 cudnnTensorDescriptor       diffDesc,
																 CUdeviceptr diffData,
																 cudnnConvolutionDescriptor  convDesc,
																 cudnnConvolutionBwdDataAlgo           algo,
																 CUdeviceptr workSpace,
																 SizeT                              workSpaceSizeInBytes,
																 ref double beta,
																 cudnnTensorDescriptor       gradDesc,
																 CUdeviceptr gradData
															   );
        public ConvolutionalLayer(int widthIn, int heightIn, int channelsIn, int widthOut, int heightOut, int channelsOut, int batch, int filterWidth, int filterHeight, Activation activation, CudaBlas blasCtx, CudaDNNContext cudnnCtx, CudaContext ctx, CUmodule moduleBorder, CUmodule modulePrelu)
            : base(widthIn, heightIn, channelsIn, widthOut, heightOut, channelsOut, batch)
        {
            _activation      = activation;
            _filterX         = filterWidth;
            _filterY         = filterHeight;
            _weights         = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _d_weights       = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut);
            _bias            = new CudaDeviceVariable <float>(channelsOut);
            _d_bias          = new CudaDeviceVariable <float>(channelsOut);
            _dx              = new CudaDeviceVariable <float>(widthIn * heightIn * channelsIn * batch);
            _y               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _dy              = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _z               = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch);
            _ones            = new CudaDeviceVariable <float>(batch);
            _withBorderInput = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _withBorderDx    = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch);
            _cudnn           = cudnnCtx;
            _blas            = blasCtx;
            _descActivation  = new ActivationDescriptor();
            _descActivation.SetActivationDescriptor(cudnnActivationMode.Relu, cudnnNanPropagation.NotPropagateNan, 0);
            _descBias = new TensorDescriptor();
            _descBias.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, 1, channelsOut, 1, 1);
            _descDataInBorder = new TensorDescriptor();
            _descDataIn       = new TensorDescriptor();
            _descDataIn.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsIn, heightIn + filterHeight - 1, widthIn + filterWidth - 1);
            _descDataOut = new TensorDescriptor();
            _descDataOut.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsOut, heightOut, widthOut);
            _descFilter = new FilterDescriptor();
            _descFilter.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, channelsOut, channelsIn, filterWidth, filterHeight);
            _descConv       = new ConvolutionDescriptor();
            _descConvBorder = new ConvolutionDescriptor();
            _descConv.SetConvolution2dDescriptor(0, 0, 1, 1, 1, 1, cudnnConvolutionMode.Convolution, cudnnDataType.Float);

            int n = 0;
            int c = 0;
            int h = 0;
            int w = 0;

            _descConv.GetConvolution2dForwardOutputDim(_descDataIn, _descFilter, ref n, ref c, ref h, ref w);

            _kernelAddBorder = new AddBorderKernel(moduleBorder, ctx);
            _kernelAddBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn + filterWidth - 1, (heightIn + filterHeight - 1) / 2 + 1, 1);
            _kernelCropBorder = new CropBorderKernel(moduleBorder, ctx);
            _kernelCropBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn, heightIn / 2 + 1, 1);

            if (_activation == Activation.PRelu || _activation == Activation.LeakyRelu)
            {
                _temp                 = new CudaDeviceVariable <float>(channelsOut * batch);
                _aRelu                = new CudaDeviceVariable <float>(channelsOut);
                _dARelu               = new CudaDeviceVariable <float>(channelsOut);
                _KernelPReluForward   = new PReluForwardKernel(modulePrelu, ctx);
                _KernelPReluBackward  = new PReluBackwardKernel(modulePrelu, ctx);
                _KernelPReluBackward1 = new PReluBackward1Kernel(modulePrelu, ctx);
                _KernelPReluBackward2 = new PReluBackward2Kernel(modulePrelu, ctx);
                _KernelPReluForward.SetComputeSize((uint)widthOut * (uint)heightOut, (uint)channelsOut, (uint)batch);
                _KernelPReluBackward.SetComputeSize((uint)channelsOut, 1, 1);
            }

            cudnnConvolutionFwdAlgoPerf[] algos =
                _cudnn.FindConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv, _descDataOut, 5);

            cudnnConvolutionBwdDataAlgoPerf[] algos2 = _cudnn.FindConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, 5);

            _algoFwd = _cudnn.GetConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv,
                                                             _descDataOut, cudnnConvolutionFwdPreference.PreferFastest, 0);


            SizeT sizeInBytes = 0, tmpsize = 0;

            sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_descDataIn, _descFilter,
                                                                    _descConv, _descDataOut, _algoFwd);

            _algoBwdFilter = _cudnn.GetConvolutionBackwardFilterAlgorithm(_descDataIn, _descDataOut, _descConv, _descFilter,
                                                                          cudnnConvolutionBwdFilterPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardFilterWorkspaceSize(_descDataIn, _descDataOut, _descConv, _descFilter, _algoBwdFilter);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            _algoBwdData = _cudnn.GetConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, cudnnConvolutionBwdDataPreference.PreferFastest, 0);

            tmpsize     = _cudnn.GetConvolutionBackwardDataWorkspaceSize(_descFilter, _descDataOut, _descConv, _descDataIn, _algoBwdData);
            sizeInBytes = Math.Max(sizeInBytes, tmpsize);

            if (sizeInBytes > 0)
            {
                _workspace = new CudaDeviceVariable <byte>(sizeInBytes);
            }
            else
            {
                _workspace = CudaDeviceVariable <byte> .Null;
            }
        }
예제 #3
0
		public static extern cudnnStatus cudnnGetConvolutionBackwardDataWorkspaceSize( cudnnHandle handle,
																		   cudnnFilterDescriptor      filterDesc,
																		   cudnnTensorDescriptor       diffDesc,
																		   cudnnConvolutionDescriptor convDesc,  
																		   cudnnTensorDescriptor       gradDesc,
																		   cudnnConvolutionBwdDataAlgo          algo,
																		   ref SizeT                            sizeInBytes
																		);        
예제 #4
0
		public static extern cudnnStatus cudnnGetConvolutionBackwardDataAlgorithm( cudnnHandle handle,
																	   cudnnFilterDescriptor       filterDesc,
																	   cudnnTensorDescriptor       diffDesc,
																	   cudnnConvolutionDescriptor  convDesc, 
																	   cudnnTensorDescriptor       gradDesc,
																	   cudnnConvolutionBwdDataPreference preference, 
																	   SizeT                              memoryLimitInbytes,
																	   ref cudnnConvolutionBwdDataAlgo algo
																	 );
예제 #5
0
		/// <summary>
		/// This function computes the convolution gradient with respect to the output tensor using
		/// the specified algo, returning results in gradDesc. Scaling factors alpha and beta can
		/// be used to scale the input tensor and the output tensor respectively.
		/// </summary>
		/// <param name="alpha">Pointer to scaling factors (in host memory) used to blend the computation
		/// result with prior value in the output layer as follows: dstValue =
		/// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for
		/// additional details.</param>
		/// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="filterData">Data pointer to GPU memory associated with the filter descriptor filterDesc.</param>
		/// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param>
		/// <param name="diffData">Data pointer to GPU memory associated with the input differential tensor descriptor diffDesc.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="algo">Enumerant that specifies which backward data convolution algorithm shoud be used to compute the results</param>
		/// <param name="workSpace">Data pointer to GPU memory to a workspace needed to able to execute
		/// the specified algorithm. If no workspace is needed for a particular
		/// algorithm, that pointer can be nil</param>
		/// <param name="beta">Pointer to scaling factors (in host memory) used to blend the computation
		/// result with prior value in the output layer as follows: dstValue =
		/// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for
		/// additional details.</param>
		/// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param>
		/// <param name="gradData">Data pointer to GPU memory associated with the output tensor descriptor
		/// gradDesc that carries the result.</param>
		public void ConvolutionBackwardData(double alpha,
											FilterDescriptor filterDesc,
											CudaDeviceVariable<double> filterData,
											TensorDescriptor diffDesc,
											CudaDeviceVariable<double> diffData,
											ConvolutionDescriptor convDesc,
											cudnnConvolutionBwdDataAlgo algo,
											CudaDeviceVariable<byte> workSpace,
											double beta,
											TensorDescriptor gradDesc,
											CudaDeviceVariable<double> gradData
										)
		{
			res = CudaDNNNativeMethods.cudnnConvolutionBackwardData(_handle, ref alpha, filterDesc.Desc, filterData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpace.SizeInBytes, ref beta, gradDesc.Desc, gradData.DevicePointer);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardData", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
		}
예제 #6
0
		/// <summary>
		/// This function returns the amount of GPU memory workspace the user needs
		/// to allocate to be able to call cudnnConvolutionBackwardData_v3 with the
		/// specified algorithm. The workspace allocated will then be passed to the routine
		/// cudnnConvolutionBackwardData_v3. The specified algorithm can be the result of the
		/// call to cudnnGetConvolutionBackwardDataAlgorithm or can be chosen arbitrarily
		/// by the user. Note that not every algorithm is available for every configuration of the
		/// input tensor and/or every configuration of the convolution descriptor.
		/// </summary>
		/// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param>
		/// <param name="algo">Enumerant that specifies the chosen convolution algorithm</param>
		/// <returns>Amount of GPU memory needed as workspace to be able to execute a forward convolution with the specified algo</returns>
		public SizeT GetConvolutionBackwardDataWorkspaceSize(FilterDescriptor filterDesc,
															TensorDescriptor diffDesc,
															ConvolutionDescriptor convDesc,
															TensorDescriptor gradDesc,
															cudnnConvolutionBwdDataAlgo algo
														)
		{
			SizeT sizeInBytes = new SizeT();
			res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardDataWorkspaceSize(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, algo, ref sizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardDataWorkspaceSize", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
			return sizeInBytes;
		}
예제 #7
0
		/// <summary>
		/// This function serves as a heuristic for obtaining the best suited algorithm for
		/// cudnnConvolutionBackwardData_v3 for the given layer specifications. Based
		/// on the input preference, this function will either return the fastest algorithm or the
		/// fastest algorithm within a given memory limit. For an exhaustive search for the fastest
		/// algorithm, please use cudnnFindConvolutionBackwardDataAlgorithm.
		/// </summary>
		/// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param>
		/// <param name="preference">Enumerant to express the preference criteria in terms of memory
		/// requirement and speed.</param>
		/// <param name="memoryLimitInbytes">It is to specify the maximum amount of GPU memory the user is willing to
		/// use as a workspace. This is currently a placeholder and is not used.</param>
		/// <returns>Enumerant that specifies which convolution algorithm should be used to
		/// compute the results according to the specified preference</returns>
		public cudnnConvolutionBwdDataAlgo GetConvolutionBackwardDataAlgorithm(FilterDescriptor filterDesc,
														TensorDescriptor diffDesc,
														ConvolutionDescriptor convDesc,
														TensorDescriptor gradDesc,
														cudnnConvolutionBwdDataPreference preference,
														SizeT memoryLimitInbytes
														)
		{
			cudnnConvolutionBwdDataAlgo algo = new cudnnConvolutionBwdDataAlgo();
			res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardDataAlgorithm(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, preference, memoryLimitInbytes, ref algo);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardDataAlgorithm", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
			return algo;
		}