public void GetConvolutionForwardAlgorithm(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdPreference preference, SizeT memoryLimitInbytes, ref cudnnConvolutionFwdAlgo algo ) { res = CudaDNNNativeMethods.cudnnGetConvolutionForwardAlgorithm(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardAlgorithm", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public SizeT GetConvolutionForwardWorkspaceSize(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdAlgo algo ) { SizeT sizeInBytes = 0; res = CudaDNNNativeMethods.cudnnGetConvolutionForwardWorkspaceSize(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardWorkspaceSize", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(sizeInBytes); }
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform the forward multiconvolution */ public void ConvolutionForward(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable <double> srcData, FilterDescriptor filterDesc, CudaDeviceVariable <double> filterData, ConvolutionDescriptor convDesc, cudnnConvolutionFwdAlgo algo, CudaDeviceVariable <byte> workSpace, SizeT workSpaceSizeInBytes, double beta, TensorDescriptor destDesc, CudaDeviceVariable <double> destData ) { res = CudaDNNNativeMethods.cudnnConvolutionForward(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, filterData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpaceSizeInBytes, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionForward", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public ConvolutionalLayer(int widthIn, int heightIn, int channelsIn, int widthOut, int heightOut, int channelsOut, int batch, int filterWidth, int filterHeight, Activation activation, CudaBlas blasCtx, CudaDNNContext cudnnCtx, CudaContext ctx, CUmodule moduleBorder, CUmodule modulePrelu) : base(widthIn, heightIn, channelsIn, widthOut, heightOut, channelsOut, batch) { _activation = activation; _filterX = filterWidth; _filterY = filterHeight; _weights = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut); _d_weights = new CudaDeviceVariable <float>(filterWidth * filterHeight * channelsIn * channelsOut); _bias = new CudaDeviceVariable <float>(channelsOut); _d_bias = new CudaDeviceVariable <float>(channelsOut); _dx = new CudaDeviceVariable <float>(widthIn * heightIn * channelsIn * batch); _y = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch); _dy = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch); _z = new CudaDeviceVariable <float>(widthOut * heightOut * channelsOut * batch); _ones = new CudaDeviceVariable <float>(batch); _withBorderInput = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch); _withBorderDx = new CudaDeviceVariable <float>((widthIn + filterWidth - 1) * (heightIn + filterHeight - 1) * channelsIn * batch); _cudnn = cudnnCtx; _blas = blasCtx; _descActivation = new ActivationDescriptor(); _descActivation.SetActivationDescriptor(cudnnActivationMode.Relu, cudnnNanPropagation.NotPropagateNan, 0); _descBias = new TensorDescriptor(); _descBias.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, 1, channelsOut, 1, 1); _descDataInBorder = new TensorDescriptor(); _descDataIn = new TensorDescriptor(); _descDataIn.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsIn, heightIn + filterHeight - 1, widthIn + filterWidth - 1); _descDataOut = new TensorDescriptor(); _descDataOut.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, batch, channelsOut, heightOut, widthOut); _descFilter = new FilterDescriptor(); _descFilter.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, channelsOut, channelsIn, filterWidth, filterHeight); _descConv = new ConvolutionDescriptor(); _descConvBorder = new ConvolutionDescriptor(); _descConv.SetConvolution2dDescriptor(0, 0, 1, 1, 1, 1, cudnnConvolutionMode.Convolution, cudnnDataType.Float); int n = 0; int c = 0; int h = 0; int w = 0; _descConv.GetConvolution2dForwardOutputDim(_descDataIn, _descFilter, ref n, ref c, ref h, ref w); _kernelAddBorder = new AddBorderKernel(moduleBorder, ctx); _kernelAddBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn + filterWidth - 1, (heightIn + filterHeight - 1) / 2 + 1, 1); _kernelCropBorder = new CropBorderKernel(moduleBorder, ctx); _kernelCropBorder.BlockDimensions = new ManagedCuda.VectorTypes.dim3(widthIn, heightIn / 2 + 1, 1); if (_activation == Activation.PRelu || _activation == Activation.LeakyRelu) { _temp = new CudaDeviceVariable <float>(channelsOut * batch); _aRelu = new CudaDeviceVariable <float>(channelsOut); _dARelu = new CudaDeviceVariable <float>(channelsOut); _KernelPReluForward = new PReluForwardKernel(modulePrelu, ctx); _KernelPReluBackward = new PReluBackwardKernel(modulePrelu, ctx); _KernelPReluBackward1 = new PReluBackward1Kernel(modulePrelu, ctx); _KernelPReluBackward2 = new PReluBackward2Kernel(modulePrelu, ctx); _KernelPReluForward.SetComputeSize((uint)widthOut * (uint)heightOut, (uint)channelsOut, (uint)batch); _KernelPReluBackward.SetComputeSize((uint)channelsOut, 1, 1); } cudnnConvolutionFwdAlgoPerf[] algos = _cudnn.FindConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv, _descDataOut, 5); cudnnConvolutionBwdDataAlgoPerf[] algos2 = _cudnn.FindConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, 5); _algoFwd = _cudnn.GetConvolutionForwardAlgorithm(_descDataIn, _descFilter, _descConv, _descDataOut, cudnnConvolutionFwdPreference.PreferFastest, 0); SizeT sizeInBytes = 0, tmpsize = 0; sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_descDataIn, _descFilter, _descConv, _descDataOut, _algoFwd); _algoBwdFilter = _cudnn.GetConvolutionBackwardFilterAlgorithm(_descDataIn, _descDataOut, _descConv, _descFilter, cudnnConvolutionBwdFilterPreference.PreferFastest, 0); tmpsize = _cudnn.GetConvolutionBackwardFilterWorkspaceSize(_descDataIn, _descDataOut, _descConv, _descFilter, _algoBwdFilter); sizeInBytes = Math.Max(sizeInBytes, tmpsize); _algoBwdData = _cudnn.GetConvolutionBackwardDataAlgorithm(_descFilter, _descDataOut, _descConv, _descDataIn, cudnnConvolutionBwdDataPreference.PreferFastest, 0); tmpsize = _cudnn.GetConvolutionBackwardDataWorkspaceSize(_descFilter, _descDataOut, _descConv, _descDataIn, _algoBwdData); sizeInBytes = Math.Max(sizeInBytes, tmpsize); if (sizeInBytes > 0) { _workspace = new CudaDeviceVariable <byte>(sizeInBytes); } else { _workspace = CudaDeviceVariable <byte> .Null; } }
public SizeT GetConvolutionForwardWorkspaceSize(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdAlgo algo ) { SizeT sizeInBytes = 0; res = CudaDNNNativeMethods.cudnnGetConvolutionForwardWorkspaceSize(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardWorkspaceSize", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); return sizeInBytes; }
public void GetConvolutionForwardAlgorithm(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdPreference preference, SizeT memoryLimitInbytes, ref cudnnConvolutionFwdAlgo algo ) { res = CudaDNNNativeMethods.cudnnGetConvolutionForwardAlgorithm(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardAlgorithm", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform the forward multiconvolution */ public void ConvolutionForward(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, FilterDescriptor filterDesc, CudaDeviceVariable<double> filterData, ConvolutionDescriptor convDesc, cudnnConvolutionFwdAlgo algo, CudaDeviceVariable<byte> workSpace, SizeT workSpaceSizeInBytes, double beta, TensorDescriptor destDesc, CudaDeviceVariable<double> destData ) { res = CudaDNNNativeMethods.cudnnConvolutionForward(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, filterData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpaceSizeInBytes, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionForward", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public static extern cudnnStatus cudnnConvolutionForward( cudnnHandle handle, ref double alpha, cudnnTensorDescriptor srcDesc, CUdeviceptr srcData, cudnnFilterDescriptor filterDesc, CUdeviceptr filterData, cudnnConvolutionDescriptor convDesc, cudnnConvolutionFwdAlgo algo, CUdeviceptr workSpace, SizeT workSpaceSizeInBytes, ref double beta, cudnnTensorDescriptor destDesc, CUdeviceptr destData );
public static extern cudnnStatus cudnnGetConvolutionForwardWorkspaceSize( cudnnHandle handle, cudnnTensorDescriptor srcDesc, cudnnFilterDescriptor filterDesc, cudnnConvolutionDescriptor convDesc, cudnnTensorDescriptor destDesc, cudnnConvolutionFwdAlgo algo, ref SizeT sizeInBytes );
public static extern cudnnStatus cudnnGetConvolutionForwardAlgorithm( cudnnHandle handle, cudnnTensorDescriptor srcDesc, cudnnFilterDescriptor filterDesc, cudnnConvolutionDescriptor convDesc, cudnnTensorDescriptor destDesc, cudnnConvolutionFwdPreference preference, SizeT memoryLimitInbytes, ref cudnnConvolutionFwdAlgo algo );