/// <summary> /// This function provides the output dimensions of a tensor after Nd pooling has been applied /// </summary> /// <param name="inputTensorDesc">Handle to the previously initialized input tensor descriptor.</param> /// <param name="nbDims">Number of dimensions in which pooling is to be applied.</param> /// <param name="outputTensorDimA">Array of nbDims output dimensions</param> public void GetPoolingNdForwardOutputDim(TensorDescriptor inputTensorDesc, int nbDims, int[] outputTensorDimA) { res = CudaDNNNativeMethods.cudnnGetPoolingNdForwardOutputDim(_desc, inputTensorDesc.Desc, nbDims, outputTensorDimA); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetPoolingNdForwardOutputDim", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/* Set all data points of a tensor to a given value : srcDest = alpha * srcDest */ public void ScaleTensor(TensorDescriptor srcDestDesc, CudaDeviceVariable <double> srcDestData, double alpha ) { res = CudaDNNNativeMethods.cudnnScaleTensor(_handle, srcDestDesc.Desc, srcDestData.DevicePointer, ref alpha); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnScaleTensor", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public SizeT InitTransformDest(TensorDescriptor srcDesc, TensorDescriptor destDesc) { SizeT destSizeInBytes = new SizeT(); res = CudaDNNNativeMethods.cudnnInitTransformDest(_desc, srcDesc.Desc, destDesc.Desc, ref destSizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnInitTransformDest", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(destSizeInBytes); }
/// <summary> /// This function returns the dimensions of the resulting n-D tensor of a nbDims-2-D /// convolution, given the convolution descriptor, the input tensor descriptor and the filter /// descriptor This function can help to setup the output tensor and allocate the proper /// amount of memory prior to launch the actual convolution.<para/> /// Each dimension of the (nbDims-2)-D images of the output tensor is computed as /// followed:<para/> /// outputDim = 1 + (inputDim + 2*pad - filterDim)/convolutionStride; /// </summary> /// <param name="inputTensorDesc">Handle to a previously initialized tensor descriptor.</param> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="nbDims">Dimension of the output tensor</param> /// <param name="tensorOuputDimA">Array of dimensions nbDims that contains on exit of this routine the sizes /// of the output tensor</param> public void GetConvolutionNdForwardOutputDim(TensorDescriptor inputTensorDesc, FilterDescriptor filterDesc, int nbDims, int[] tensorOuputDimA ) { res = CudaDNNNativeMethods.cudnnGetConvolutionNdForwardOutputDim(_desc, inputTensorDesc.Desc, filterDesc.Desc, nbDims, tensorOuputDimA); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionNdForwardOutputDim", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/// <summary> /// This function provides the output dimensions of a tensor after 2d pooling has been applied /// </summary> /// <param name="inputTensorDesc">Handle to the previously initialized input tensor descriptor.</param> /// <param name="n">Number of images in the output</param> /// <param name="c">Number of channels in the output</param> /// <param name="h">Height of images in the output</param> /// <param name="w">Width of images in the output</param> public void GetPooling2dForwardOutputDim(TensorDescriptor inputTensorDesc, ref int n, ref int c, ref int h, ref int w) { res = CudaDNNNativeMethods.cudnnGetPooling2dForwardOutputDim(_desc, inputTensorDesc.Desc, ref n, ref c, ref h, ref w); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetPooling2dForwardOutputDim", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/* Tensor layout conversion helper (dest = alpha * src + beta * dest) */ public void TransformTensor(float alpha, TensorDescriptor srcDesc, CudaDeviceVariable <float> srcData, float beta, TensorDescriptor destDesc, CudaDeviceVariable <float> destData ) { res = CudaDNNNativeMethods.cudnnTransformTensor(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnTransformTensor", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public void Im2Col(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable <double> srcData, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, CudaDeviceVariable <byte> colBuffer ) { res = CudaDNNNativeMethods.cudnnIm2Col(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, convDesc.Desc, colBuffer.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnIm2Col", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/* Functions to perform the backward multiconvolution */ public void ConvolutionBackwardBias(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable <double> srcData, double beta, TensorDescriptor destDesc, CudaDeviceVariable <double> destData ) { res = CudaDNNNativeMethods.cudnnConvolutionBackwardBias(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardBias", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public void GetConvolutionForwardAlgorithm(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdPreference preference, SizeT memoryLimitInbytes, ref cudnnConvolutionFwdAlgo algo ) { res = CudaDNNNativeMethods.cudnnGetConvolutionForwardAlgorithm(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardAlgorithm", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public SizeT GetConvolutionForwardWorkspaceSize(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdAlgo algo ) { SizeT sizeInBytes = 0; res = CudaDNNNativeMethods.cudnnGetConvolutionForwardWorkspaceSize(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardWorkspaceSize", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(sizeInBytes); }
/// <summary> /// This function performs a sampler operation and generates the output tensor using the grid given by the grid generator. /// </summary> /// <param name="alpha">Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue.</param> /// <param name="xDesc">Handle to the previously initialized input tensor descriptor.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptor xDesc.</param> /// <param name="grid">A grid of coordinates generated by cudnnSpatialTfGridGeneratorForward.</param> /// <param name="beta">Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue.</param> /// <param name="yDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor yDesc.</param> public void SpatialTfSamplerForward( double alpha, TensorDescriptor xDesc, CudaDeviceVariable <double> x, CudaDeviceVariable <double> grid, double beta, TensorDescriptor yDesc, CudaDeviceVariable <double> y) { res = CudaDNNNativeMethods.cudnnSpatialTfSamplerForward(_handle, _desc, ref alpha, xDesc.Desc, x.DevicePointer, grid.DevicePointer, ref beta, yDesc.Desc, y.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnSpatialTfSamplerForward", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/* Function to perform backward activation */ public void ActivationBackward(cudnnActivationMode mode, float alpha, TensorDescriptor srcDesc, CudaDeviceVariable<float> srcData, TensorDescriptor srcDiffDesc, CudaDeviceVariable<float> srcDiffData, TensorDescriptor destDesc, CudaDeviceVariable<float> destData, float beta, TensorDescriptor destDiffDesc, CudaDeviceVariable<float> destDiffData ) { res = CudaDNNNativeMethods.cudnnActivationBackward(_handle, mode, ref alpha, srcDesc.Desc, srcData.DevicePointer, srcDiffDesc.Desc, srcDiffData.DevicePointer, destDesc.Desc, destData.DevicePointer, ref beta, destDiffDesc.Desc, destDiffData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnActivationForward", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform forward softmax */ public void SoftmaxForward(cudnnSoftmaxAlgorithm algorithm, cudnnSoftmaxMode mode, double alpha, TensorDescriptor srcDesc, CudaDeviceVariable <double> srcData, double beta, TensorDescriptor destDesc, CudaDeviceVariable <double> destData ) { res = CudaDNNNativeMethods.cudnnSoftmaxForward(_handle, algorithm, mode, ref alpha, srcDesc.Desc, srcData.DevicePointer, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnSoftmaxForward", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public void ConvolutionBackwardData(double alpha, FilterDescriptor filterDesc, CudaDeviceVariable <double> filterData, TensorDescriptor diffDesc, CudaDeviceVariable <double> diffData, ConvolutionDescriptor convDesc, double beta, TensorDescriptor gradDesc, CudaDeviceVariable <double> gradData ) { res = CudaDNNNativeMethods.cudnnConvolutionBackwardData(_handle, ref alpha, filterDesc.Desc, filterData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, ref beta, gradDesc.Desc, gradData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardData", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/// <summary> /// return the workspace size needed for ctc /// </summary> /// <param name="handle">Handle to a previously created cuDNN context.</param> /// <param name="probsDesc">Handle to the previously initialized probabilities tensor descriptor.</param> /// <param name="gradientsDesc">Handle to a previously initialized gradients tensor descriptor.</param> /// <param name="labels">Pointer to a previously initialized labels list.</param> /// <param name="labelLengths">Pointer to a previously initialized lengths list, to walk the above labels list.</param> /// <param name="inputLengths">Pointer to a previously initialized list of the lengths of the timing steps in each batch.</param> /// <param name="algo">Enumerant that specifies the chosen CTC loss algorithm</param> /// <returns>Amount of GPU memory needed as workspace to be able to execute the CTC /// loss computation with the specified algo.</returns> public SizeT CTCLoss(CudaDNNContext handle, TensorDescriptor probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the mini batch size, A is the alphabet size) */ int[] labels, /* labels, in CPU memory */ int[] labelLengths, /* the length of each label, in CPU memory */ int[] inputLengths, /* the lengths of timing steps in each batch, in CPU memory */ TensorDescriptor gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */ cudnnCTCLossAlgo algo /* algorithm selected, supported now 0 and 1 */ ) { SizeT size = new SizeT(); res = CudaDNNNativeMethods.cudnnGetCTCLossWorkspaceSize(handle.Handle, probsDesc.Desc, gradientsDesc.Desc, labels, labelLengths, inputLengths, algo, _desc, ref size); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetCTCLossWorkspaceSize", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(size); }
/// <summary> /// This function returns the ctc costs and gradients, given the probabilities and labels. /// </summary> /// <param name="handle">Handle to a previously created cuDNN context.</param> /// <param name="probsDesc">Handle to the previously initialized probabilities tensor descriptor.</param> /// <param name="probs">Pointer to a previously initialized probabilities tensor.</param> /// <param name="labels">Pointer to a previously initialized labels list.</param> /// <param name="labelLengths">Pointer to a previously initialized lengths list, to walk the above labels list.</param> /// <param name="inputLengths">Pointer to a previously initialized list of the lengths of the timing steps in each batch.</param> /// <param name="costs">Pointer to the computed costs of CTC.</param> /// <param name="gradientsDesc">Handle to a previously initialized gradients tensor descriptor.</param> /// <param name="gradients">Pointer to the computed gradients of CTC.</param> /// <param name="algo">Enumerant that specifies the chosen CTC loss algorithm.</param> /// <param name="workspace">Pointer to GPU memory of a workspace needed to able to execute the specified algorithm.</param> public void CTCLoss(CudaDNNContext handle, TensorDescriptor probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the mini batch size, A is the alphabet size) */ CudaDeviceVariable <double> probs, /* probabilities after softmax, in GPU memory */ int[] labels, /* labels, in CPU memory */ int[] labelLengths, /* the length of each label, in CPU memory */ int[] inputLengths, /* the lengths of timing steps in each batch, in CPU memory */ CudaDeviceVariable <double> costs, /* the returned costs of CTC, in GPU memory */ TensorDescriptor gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */ CudaDeviceVariable <double> gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */ cudnnCTCLossAlgo algo, /* algorithm selected, supported now 0 and 1 */ CudaDeviceVariable <byte> workspace /* pointer to the workspace, in GPU memory */ ) { res = CudaDNNNativeMethods.cudnnCTCLoss(handle.Handle, probsDesc.Desc, probs.DevicePointer, labels, labelLengths, inputLengths, costs.DevicePointer, gradientsDesc.Desc, gradients.DevicePointer, algo, _desc, workspace.DevicePointer, workspace.SizeInBytes); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnCTCLoss", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform the forward multiconvolution */ public void ConvolutionForward(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable <double> srcData, FilterDescriptor filterDesc, CudaDeviceVariable <double> filterData, ConvolutionDescriptor convDesc, cudnnConvolutionFwdAlgo algo, CudaDeviceVariable <byte> workSpace, SizeT workSpaceSizeInBytes, double beta, TensorDescriptor destDesc, CudaDeviceVariable <double> destData ) { res = CudaDNNNativeMethods.cudnnConvolutionForward(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, filterData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpaceSizeInBytes, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionForward", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/// <summary> /// This function computes the gradient of a sampling operation. /// </summary> /// <param name="alpha">Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue.</param> /// <param name="xDesc">Handle to the previously initialized input tensor descriptor.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptor xDesc.</param> /// <param name="beta">Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue.</param> /// <param name="dxDesc">Handle to the previously initialized output differential tensor descriptor.</param> /// <param name="dx">Data pointer to GPU memory associated with the output tensor descriptor dxDesc.</param> /// <param name="alphaDgrid">Pointer to scaling factor (in host memory) used to blend the gradient outputs dgrid with prior value in the destination pointer as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue.</param> /// <param name="dyDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="dy">Data pointer to GPU memory associated with the tensor descriptor dyDesc.</param> /// <param name="grid">A grid of coordinates generated by cudnnSpatialTfGridGeneratorForward.</param> /// <param name="betaDgrid">Pointer to scaling factor (in host memory) used to blend the gradient outputs dgrid with prior value in the destination pointer as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue.</param> /// <param name="dgrid">Data pointer to GPU memory contains the output differential data.</param> public void cudnnSpatialTfSamplerBackward( float alpha, TensorDescriptor xDesc, CudaDeviceVariable <float> x, float beta, TensorDescriptor dxDesc, CudaDeviceVariable <float> dx, CudaDeviceVariable <float> alphaDgrid, TensorDescriptor dyDesc, CudaDeviceVariable <float> dy, CudaDeviceVariable <float> grid, float betaDgrid, CudaDeviceVariable <float> dgrid) { res = CudaDNNNativeMethods.cudnnSpatialTfSamplerBackward(_handle, _desc, ref alpha, xDesc.Desc, x.DevicePointer, ref beta, dxDesc.Desc, dx.DevicePointer, alphaDgrid.DevicePointer, dyDesc.Desc, dy.DevicePointer, grid.DevicePointer, ref betaDgrid, dgrid.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnSpatialTfSamplerBackward", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/* Tensor layout conversion helper (dest = alpha * src + beta * dest) */ public void TransformTensor(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, double beta, TensorDescriptor destDesc, CudaDeviceVariable<double> destData ) { res = CudaDNNNativeMethods.cudnnTransformTensor(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnTransformTensor", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/* Set all data points of a tensor to a given value : srcDest = value */ public void SetTensor(TensorDescriptor srcDestDesc, CudaDeviceVariable<double> srcDestData, CudaDeviceVariable<double> value ) { res = CudaDNNNativeMethods.cudnnSetTensor(_handle, srcDestDesc.Desc, srcDestData.DevicePointer, value.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnSetTensor", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform forward softmax */ public void SoftmaxForward(cudnnSoftmaxAlgorithm algorithm, cudnnSoftmaxMode mode, double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, double beta, TensorDescriptor destDesc, CudaDeviceVariable<double> destData ) { res = CudaDNNNativeMethods.cudnnSoftmaxForward(_handle, algorithm, mode, ref alpha, srcDesc.Desc, srcData.DevicePointer, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnSoftmaxForward", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function attempts all cuDNN algorithms for /// cudnnConvolutionBackwardData_v3 and outputs performance metrics to a user- /// allocated array of cudnnConvolutionBwdDataAlgoPerf_t. These metrics are written /// in sorted fashion where the first element has the lowest compute time. /// </summary> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="requestedAlgoCount">The maximum number of elements to be stored in perfResults.</param> /// <returns>An array to store performance metrics sorted ascending by compute time.</returns> public cudnnConvolutionBwdDataAlgoPerf[] FindConvolutionBackwardDataAlgorithm(FilterDescriptor filterDesc, TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, TensorDescriptor gradDesc, int requestedAlgoCount ) { cudnnConvolutionBwdDataAlgoPerf[] temp = new cudnnConvolutionBwdDataAlgoPerf[requestedAlgoCount]; int returnedAlgoCount = 0; res = CudaDNNNativeMethods.cudnnFindConvolutionBackwardDataAlgorithm(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, requestedAlgoCount, ref returnedAlgoCount, temp); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnFindConvolutionBackwardDataAlgorithm", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); if (returnedAlgoCount <= 0) return null; cudnnConvolutionBwdDataAlgoPerf[] perfResults = new cudnnConvolutionBwdDataAlgoPerf[returnedAlgoCount]; Array.Copy(temp, perfResults, returnedAlgoCount); return perfResults; }
/// <summary> /// This function computes the convolution gradient with respect to the output tensor using /// the specified algo, returning results in gradDesc. Scaling factors alpha and beta can /// be used to scale the input tensor and the output tensor respectively. /// </summary> /// <param name="alpha">Pointer to scaling factors (in host memory) used to blend the computation /// result with prior value in the output layer as follows: dstValue = /// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for /// additional details.</param> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="filterData">Data pointer to GPU memory associated with the filter descriptor filterDesc.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="diffData">Data pointer to GPU memory associated with the input differential tensor descriptor diffDesc.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="algo">Enumerant that specifies which backward data convolution algorithm shoud be used to compute the results</param> /// <param name="workSpace">Data pointer to GPU memory to a workspace needed to able to execute /// the specified algorithm. If no workspace is needed for a particular /// algorithm, that pointer can be nil</param> /// <param name="beta">Pointer to scaling factors (in host memory) used to blend the computation /// result with prior value in the output layer as follows: dstValue = /// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for /// additional details.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="gradData">Data pointer to GPU memory associated with the output tensor descriptor /// gradDesc that carries the result.</param> public void ConvolutionBackwardData(double alpha, FilterDescriptor filterDesc, CudaDeviceVariable<double> filterData, TensorDescriptor diffDesc, CudaDeviceVariable<double> diffData, ConvolutionDescriptor convDesc, cudnnConvolutionBwdDataAlgo algo, CudaDeviceVariable<byte> workSpace, double beta, TensorDescriptor gradDesc, CudaDeviceVariable<double> gradData ) { res = CudaDNNNativeMethods.cudnnConvolutionBackwardData(_handle, ref alpha, filterDesc.Desc, filterData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpace.SizeInBytes, ref beta, gradDesc.Desc, gradData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardData", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This routine executes the recurrent neural network described by rnnDesc with inputs x, hx, cx, weights w /// and outputs y, hy, cy. workspace is required for intermediate storage. reserveSpace stores data required /// for training. The same reserveSpace data must be used for future calls to cudnnRNNBackwardData and /// cudnnRNNBackwardWeights if these execute on the same input data. /// </summary> /// <param name="xDesc">An array of tensor descriptors describing the input to each recurrent iteration. Each /// tensor descriptor must have the same first dimension. The second dimension of the tensors may decrease /// from element n to element n+1 but may not increase. The tensor must be fully packed.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptors in the array xDesc.</param> /// <param name="hxDesc">Handle to a previously initialized tensor descriptor describing the initial hidden state /// of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second /// dimension of the first tensor described in xDesc. The third dimension must match the numLayers argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="hx">Data pointer to GPU memory associated with the tensor descriptor hxDesc. If a NULL pointer /// is passed, the initial hidden state of the network will be initialized to zero.</param> /// <param name="cxDesc">Handle to a previously initialized tensor descriptor describing the initial /// cell state for LSTM networks. The first dimension of the tensor must match the hiddenSize argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match /// the second dimension of the first tensor described in xDesc. The third dimension must match the numLayers /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully /// packed.</param> /// <param name="cx">Data pointer to GPU memory associated with the tensor descriptor cxDesc. If a NULL pointer is /// passed, the initial cell state of the network will be initialized to zero.</param> /// <param name="wDesc">Handle to a previously initialized filter descriptor describing the weights for the RNN.</param> /// <param name="w">Data pointer to GPU memory associated with the filter descriptor wDesc.</param> /// <param name="yDesc">An array of tensor descriptors describing the output from each recurrent iteration. The first /// dimension of the tensor depends on the direction argument passed to the cudnnSetRNNDescriptor /// call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor /// n in xDesc. The tensor must be fully packed.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor yDesc.</param> /// <param name="hyDesc">Handle to a previously initialized tensor descriptor describing the final /// hidden state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="hy">Data pointer to GPU memory associated with the tensor descriptor hyDesc. If a /// NULL pointer is passed, the final hidden state of the network will not be saved.</param> /// <param name="cyDesc">Handle to a previously initialized tensor descriptor describing the final cell state /// for LSTM networks. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="cy">Data pointer to GPU memory associated with the tensor descriptor cyDesc. If a NULL pointer is /// passed, the final cell state of the network will be not be saved.</param> /// <param name="workspace">Data pointer to GPU memory to be used as a workspace for this call.</param> /// <param name="workSpaceSizeInBytes">Specifies the size in bytes of the provided workspace.</param> /// <param name="reserveSpace">Data pointer to GPU memory to be used as a reserve space for this call.</param> /// <param name="reserveSpaceSizeInBytes">Specifies the size in bytes of the provided reserveSpace.</param> public void RNNForwardTraining( TensorDescriptor[] xDesc, CudaDeviceVariable<double> x, TensorDescriptor hxDesc, CudaDeviceVariable<double> hx, TensorDescriptor cxDesc, CudaDeviceVariable<double> cx, FilterDescriptor wDesc, CudaDeviceVariable<double> w, TensorDescriptor[] yDesc, CudaDeviceVariable<double> y, TensorDescriptor hyDesc, CudaDeviceVariable<double> hy, TensorDescriptor cyDesc, CudaDeviceVariable<double> cy, CudaDeviceVariable<byte> workspace, SizeT workSpaceSizeInBytes, CudaDeviceVariable<byte> reserveSpace, SizeT reserveSpaceSizeInBytes) { var a1 = xDesc.Select(q => q.Desc).ToArray(); var a2 = yDesc.Select(q => q.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnRNNForwardTraining( _handle, _desc, a1, x.DevicePointer, hxDesc.Desc, hx.DevicePointer, cxDesc.Desc, cx.DevicePointer, wDesc.Desc, w.DevicePointer, a2, y.DevicePointer, hyDesc.Desc, hy.DevicePointer, cyDesc.Desc, cy.DevicePointer, workspace.DevicePointer, workSpaceSizeInBytes, reserveSpace.DevicePointer, reserveSpaceSizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnRNNForwardTraining", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This routine accumulates weight gradients dw from the recurrent neural network described /// by rnnDesc with inputs x, hx, and outputs y. The mode of operation in this case is additive, /// the weight gradients calculated will be added to those already existing in dw. workspace /// is required for intermediate storage. The data in reserveSpace must have previously been /// generated by cudnnRNNBackwardData. /// </summary> /// <param name="xDesc">An array of tensor descriptors describing the input to each recurrent iteration. /// Each tensor descriptor must have the same first dimension. The second dimension of the tensors may /// decrease from element n to element n+1 but may not increase. The tensor must be fully packed.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptors in the array xDesc.</param> /// <param name="hxDesc">Handle to a previously initialized tensor descriptor describing the initial hidden /// state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to /// the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed. </param> /// <param name="hx">Data pointer to GPU memory associated with the tensor descriptor hxDesc. If /// a NULL pointer is passed, the initial hidden state of the network will be initialized to zero.</param> /// <param name="yDesc">An array of tensor descriptors describing the output from each /// recurrent iteration. The first dimension of the tensor depends on the direction /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor n in dyDesc. /// The tensor must be fully packed.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor yDesc.</param> /// <param name="workspace">Data pointer to GPU memory to be used as a workspace for this call.</param> /// <param name="workSpaceSizeInBytes">Specifies the size in bytes of the provided workspace.</param> /// <param name="dwDesc">Handle to a previously initialized filter descriptor describing the gradients of the weights for the RNN.</param> /// <param name="dw">Data pointer to GPU memory associated with the filter descriptor dwDesc.</param> /// <param name="reserveSpace">Data pointer to GPU memory to be used as a reserve space for this call.</param> /// <param name="reserveSpaceSizeInBytes">Specifies the size in bytes of the provided reserveSpace.</param> public void RNNBackwardWeights( TensorDescriptor[] xDesc, CudaDeviceVariable<float> x, TensorDescriptor hxDesc, CudaDeviceVariable<float> hx, TensorDescriptor[] yDesc, CudaDeviceVariable<float> y, CudaDeviceVariable<byte> workspace, SizeT workSpaceSizeInBytes, FilterDescriptor dwDesc, CudaDeviceVariable<float> dw, CudaDeviceVariable<byte> reserveSpace, SizeT reserveSpaceSizeInBytes) { var a1 = xDesc.Select(q => q.Desc).ToArray(); var a2 = yDesc.Select(q => q.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnRNNBackwardWeights( _handle, _desc, a1, x.DevicePointer, hxDesc.Desc, hx.DevicePointer, a2, y.DevicePointer, workspace.DevicePointer, workSpaceSizeInBytes, dwDesc.Desc, dw.DevicePointer, reserveSpace.DevicePointer, reserveSpaceSizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnRNNBackwardWeights", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function is used to obtain a pointer and descriptor for the matrix parameters in layer within /// the RNN described by rnnDesc with inputs dimensions defined by xDesc. /// </summary> /// <param name="layer">The layer to query.</param> /// <param name="xDesc">An array of tensor descriptors describing the input to each recurrent iteration.</param> /// <param name="wDesc">Handle to a previously initialized filter descriptor describing the weights for the RNN.</param> /// <param name="w">Data pointer to GPU memory associated with the filter descriptor wDesc.</param> /// <param name="linLayerID"> /// The linear layer to obtain information about: /// * If mode in rnnDesc was set to CUDNN_RNN_RELU or CUDNN_RNN_TANH a value of 0 references the matrix multiplication /// applied to the input from the previous layer, a value of 1 references the matrix multiplication applied to the recurrent input. /// * If mode in rnnDesc was set to CUDNN_LSTM values of 0-3 reference matrix multiplications applied to the input from the /// previous layer, value of 4-7 reference matrix multiplications applied to the recurrent input. /// ‣ Values 0 and 4 reference the input gate. /// ‣ Values 1 and 5 reference the forget gate. /// ‣ Values 2 and 6 reference the new memory gate. /// ‣ Values 3 and 7 reference the output gate. /// * If mode in rnnDesc was set to CUDNN_GRU values of 0-2 reference matrix multiplications applied to the input /// from the previous layer, value of 3-5 reference matrix multiplications applied to the recurrent input. /// ‣ Values 0 and 3 reference the reset gate. /// ‣ Values 1 and 4 reference the update gate. /// ‣ Values 2 and 5 reference the new memory gate. /// </param> /// <param name="linLayerMatDesc">Handle to a previously created filter descriptor.</param> /// <param name="linLayerMat">Data pointer to GPU memory associated with the filter descriptor linLayerMatDesc.</param> public void GetRNNLinLayerMatrixParams( int layer, TensorDescriptor[] xDesc, FilterDescriptor wDesc, CudaDeviceVariable<double> w, int linLayerID, FilterDescriptor linLayerMatDesc, CudaDeviceVariable<SizeT> linLayerMat // void ** ) { var a1 = xDesc.Select(x => x.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnGetRNNLinLayerMatrixParams(_handle, _desc, layer, a1, wDesc.Desc, w.DevicePointer, linLayerID, linLayerMatDesc.Desc, linLayerMat.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetRNNLinLayerMatrixParams", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function returns the amount of GPU memory workspace the user needs /// to allocate to be able to call cudnnConvolutionBackwardData_v3 with the /// specified algorithm. The workspace allocated will then be passed to the routine /// cudnnConvolutionBackwardData_v3. The specified algorithm can be the result of the /// call to cudnnGetConvolutionBackwardDataAlgorithm or can be chosen arbitrarily /// by the user. Note that not every algorithm is available for every configuration of the /// input tensor and/or every configuration of the convolution descriptor. /// </summary> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="algo">Enumerant that specifies the chosen convolution algorithm</param> /// <returns>Amount of GPU memory needed as workspace to be able to execute a forward convolution with the specified algo</returns> public SizeT GetConvolutionBackwardDataWorkspaceSize(FilterDescriptor filterDesc, TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, TensorDescriptor gradDesc, cudnnConvolutionBwdDataAlgo algo ) { SizeT sizeInBytes = new SizeT(); res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardDataWorkspaceSize(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardDataWorkspaceSize", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); return sizeInBytes; }
/// <summary> /// This function serves as a heuristic for obtaining the best suited algorithm for /// cudnnConvolutionBackwardData_v3 for the given layer specifications. Based /// on the input preference, this function will either return the fastest algorithm or the /// fastest algorithm within a given memory limit. For an exhaustive search for the fastest /// algorithm, please use cudnnFindConvolutionBackwardDataAlgorithm. /// </summary> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="preference">Enumerant to express the preference criteria in terms of memory /// requirement and speed.</param> /// <param name="memoryLimitInbytes">It is to specify the maximum amount of GPU memory the user is willing to /// use as a workspace. This is currently a placeholder and is not used.</param> /// <returns>Enumerant that specifies which convolution algorithm should be used to /// compute the results according to the specified preference</returns> public cudnnConvolutionBwdDataAlgo GetConvolutionBackwardDataAlgorithm(FilterDescriptor filterDesc, TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, TensorDescriptor gradDesc, cudnnConvolutionBwdDataPreference preference, SizeT memoryLimitInbytes ) { cudnnConvolutionBwdDataAlgo algo = new cudnnConvolutionBwdDataAlgo(); res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardDataAlgorithm(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardDataAlgorithm", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); return algo; }
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */ public void GetConvolution2dForwardOutputDim(TensorDescriptor inputTensorDesc, FilterDescriptor filterDesc, ref int n, ref int c, ref int h, ref int w ) { res = CudaDNNNativeMethods.cudnnGetConvolution2dForwardOutputDim(_desc, inputTensorDesc.Desc, filterDesc.Desc, ref n, ref c, ref h, ref w); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolution2dForwardOutputDim", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public void ConvolutionBackwardFilter(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, TensorDescriptor diffDesc, CudaDeviceVariable<double> diffData, ConvolutionDescriptor convDesc, double beta, FilterDescriptor gradDesc, CudaDeviceVariable<double> gradData ) { res = CudaDNNNativeMethods.cudnnConvolutionBackwardFilter(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, ref beta, gradDesc.Desc, gradData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardFilter", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This routine executes the recurrent neural network described by rnnDesc with /// output gradients dy, dhy, dhc, weights w and input gradients dx, dhx, dcx. /// workspace is required for intermediate storage. The data in reserveSpace must have /// previously been generated by cudnnRNNForwardTraining. The same reserveSpace data must /// be used for future calls to cudnnRNNBackwardWeights if they execute on the same input data. /// </summary> /// <param name="yDesc">An array of tensor descriptors describing the output from each /// recurrent iteration. The first dimension of the tensor depends on the direction /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the /// hiddenSize argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor n in dyDesc. /// The tensor must be fully packed.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor yDesc.</param> /// <param name="dyDesc">An array of tensor descriptors describing the gradient at the output from each /// recurrent iteration. The first dimension of the tensor depends on the direction argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor n in dxDesc. The /// tensor must be fully packed.</param> /// <param name="dy">Data pointer to GPU memory associated with the tensor descriptors in the array dyDesc.</param> /// <param name="dhyDesc">Handle to a previously initialized tensor descriptor describing the gradients at the /// final hidden state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed /// to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second /// dimension of the first tensor described in dyDesc. The third dimension must match the numLayers argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="dhy">Data pointer to GPU memory associated with the tensor descriptor dhyDesc. If a NULL pointer /// is passed, the gradients at the final hidden state of the network will be initialized to zero.</param> /// <param name="dcyDesc">Handle to a previously initialized tensor descriptor describing the gradients at /// the final cell state of the RNN. The first dimension of the tensor must match the hiddenSize argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the /// second dimension of the first tensor described in dyDesc. The third dimension must match the numLayers argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="dcy">Data pointer to GPU memory associated with the tensor descriptor dcyDesc. If a NULL pointer /// is passed, the gradients at the final cell state of the network will be initialized to zero.</param> /// <param name="wDesc">Handle to a previously initialized filter descriptor describing the weights for the RNN.</param> /// <param name="w">Data pointer to GPU memory associated with the filter descriptor wDesc.</param> /// <param name="hxDesc">Handle to a previously initialized tensor descriptor describing the initial hidden /// state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second /// dimension of the first tensor described in xDesc. The third dimension must match the numLayers /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be /// fully packed.</param> /// <param name="hx">Data pointer to GPU memory associated with the tensor descriptor hxDesc. If a NULL pointer is /// passed, the initial hidden state of the network will be initialized to zero.</param> /// <param name="cxDesc">Handle to a previously initialized tensor descriptor describing the /// initial cell state for LSTM networks. The first dimension of the tensor must match the /// hiddenSize argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The /// second dimension must match the second dimension of the first tensor described in xDesc. The /// third dimension must match the numLayers argument passed to the cudnnSetRNNDescriptor call /// used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="cx">Data pointer to GPU memory associated with the tensor descriptor cxDesc. /// If a NULL pointer is passed, the initial cell state of the network will be initialized to zero.</param> /// <param name="dxDesc">An array of tensor descriptors describing the gradient at the input of each recurrent iteration. /// Each tensor descriptor must have the same first dimension. The second dimension of the tensors may decrease from /// element n to element n+1 but may not increase. The tensor must be fully packed.</param> /// <param name="dx">Data pointer to GPU memory associated with the tensor descriptors in the array dxDesc. </param> /// <param name="dhxDesc">Handle to a previously initialized tensor descriptor describing the gradient at the initial hidden /// state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the cudnnSetRNNDescriptor /// call used to initialize rnnDesc. The second dimension must match the second dimension of the first tensor described in xDesc. /// The third dimension must match the numLayers argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. /// The tensor must be fully packed.</param> /// <param name="dhx">Data pointer to GPU memory associated with the tensor descriptor dhxDesc. If a NULL pointer is passed, the /// gradient at the hidden input of the network will not be set.</param> /// <param name="dcxDesc">Handle to a previously initialized tensor descriptor describing the gradient /// at the initial cell state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed /// to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="dcx">Data pointer to GPU memory associated with the tensor descriptor dcxDesc. If /// a NULL pointer is passed, the gradient at the cell input of the network will not be set.</param> /// <param name="workspace">Data pointer to GPU memory to be used as a workspace for this call.</param> /// <param name="workSpaceSizeInBytes">Specifies the size in bytes of the provided workspace.</param> /// <param name="reserveSpace">Data pointer to GPU memory to be used as a reserve space for this call.</param> /// <param name="reserveSpaceSizeInBytes">Specifies the size in bytes of the provided reserveSpace.</param> public void RNNBackwardData( TensorDescriptor[] yDesc, CudaDeviceVariable<float> y, TensorDescriptor[] dyDesc, CudaDeviceVariable<float> dy, TensorDescriptor dhyDesc, CudaDeviceVariable<float> dhy, TensorDescriptor dcyDesc, CudaDeviceVariable<float> dcy, FilterDescriptor wDesc, CudaDeviceVariable<float> w, TensorDescriptor hxDesc, CudaDeviceVariable<float> hx, TensorDescriptor cxDesc, CudaDeviceVariable<float> cx, TensorDescriptor[] dxDesc, CudaDeviceVariable<float> dx, TensorDescriptor dhxDesc, CudaDeviceVariable<float> dhx, TensorDescriptor dcxDesc, CudaDeviceVariable<float> dcx, CudaDeviceVariable<byte> workspace, SizeT workSpaceSizeInBytes, CudaDeviceVariable<byte> reserveSpace, SizeT reserveSpaceSizeInBytes) { var a1 = yDesc.Select(q => q.Desc).ToArray(); var a2 = dyDesc.Select(q => q.Desc).ToArray(); var a3 = dxDesc.Select(q => q.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnRNNBackwardData( _handle, _desc, a1, y.DevicePointer, a2, dy.DevicePointer, dhyDesc.Desc, dhy.DevicePointer, dcyDesc.Desc, dcy.DevicePointer, wDesc.Desc, w.DevicePointer, hxDesc.Desc, hx.DevicePointer, cxDesc.Desc, cx.DevicePointer, a3, dx.DevicePointer, dhxDesc.Desc, dhx.DevicePointer, dcxDesc.Desc, dcx.DevicePointer, workspace.DevicePointer, workSpaceSizeInBytes, reserveSpace.DevicePointer, reserveSpaceSizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnRNNBackwardData", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform the forward multiconvolution */ public void ConvolutionForward(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, FilterDescriptor filterDesc, CudaDeviceVariable<double> filterData, ConvolutionDescriptor convDesc, cudnnConvolutionFwdAlgo algo, CudaDeviceVariable<byte> workSpace, SizeT workSpaceSizeInBytes, double beta, TensorDescriptor destDesc, CudaDeviceVariable<double> destData ) { res = CudaDNNNativeMethods.cudnnConvolutionForward(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, filterData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpaceSizeInBytes, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionForward", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function is used to query the amount of parameter space required to execute the RNN described by /// rnnDesc with inputs dimensions defined by xDesc. /// </summary> /// <param name="xDesc">An array of tensor descriptors describing the input to each recurrent iteration</param> /// <param name="sizeInBytes">Minimum amount of GPU memory needed as parameter space to be able to execute an RNN with the specified descriptor and input tensors.</param> public void cudnnGetRNNParamsSize( TensorDescriptor[] xDesc, ref SizeT sizeInBytes ) { var a1 = xDesc.Select(x => x.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnGetRNNParamsSize(_handle, _desc, a1, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetRNNParamsSize", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public void GetConvolutionForwardAlgorithm(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdPreference preference, SizeT memoryLimitInbytes, ref cudnnConvolutionFwdAlgo algo ) { res = CudaDNNNativeMethods.cudnnGetConvolutionForwardAlgorithm(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardAlgorithm", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public void GetPoolingNdForwardOutputDim(TensorDescriptor inputTensorDesc, int nbDims, int[] outputTensorDimA) { res = CudaDNNNativeMethods.cudnnGetPoolingNdForwardOutputDim(_desc, inputTensorDesc.Desc, nbDims, outputTensorDimA); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetPoolingNdForwardOutputDim", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public SizeT GetConvolutionForwardWorkspaceSize(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdAlgo algo ) { SizeT sizeInBytes = 0; res = CudaDNNNativeMethods.cudnnGetConvolutionForwardWorkspaceSize(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardWorkspaceSize", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); return sizeInBytes; }
/* Tensor Bias addition : srcDest = alpha * bias + beta * srcDestDesc */ public void AddTensor(cudnnAddMode mode, float alpha, TensorDescriptor biasDesc, CudaDeviceVariable<float> biasData, float beta, TensorDescriptor srcDestDesc, CudaDeviceVariable<float> srcDestData ) { res = CudaDNNNativeMethods.cudnnAddTensor(_handle, mode, ref alpha, biasDesc.Desc, biasData.DevicePointer, ref beta, srcDestDesc.Desc, srcDestData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnAddTensor", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public void Im2Col(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, CudaDeviceVariable<byte> colBuffer ) { res = CudaDNNNativeMethods.cudnnIm2Col(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, convDesc.Desc, colBuffer.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnIm2Col", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }