/// <summary> /// This function returns the dimensions of the resulting n-D tensor of a nbDims-2-D /// convolution, given the convolution descriptor, the input tensor descriptor and the filter /// descriptor This function can help to setup the output tensor and allocate the proper /// amount of memory prior to launch the actual convolution.<para/> /// Each dimension of the (nbDims-2)-D images of the output tensor is computed as /// followed:<para/> /// outputDim = 1 + (inputDim + 2*pad - filterDim)/convolutionStride; /// </summary> /// <param name="inputTensorDesc">Handle to a previously initialized tensor descriptor.</param> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="nbDims">Dimension of the output tensor</param> /// <param name="tensorOuputDimA">Array of dimensions nbDims that contains on exit of this routine the sizes /// of the output tensor</param> public void GetConvolutionNdForwardOutputDim(TensorDescriptor inputTensorDesc, FilterDescriptor filterDesc, int nbDims, int[] tensorOuputDimA ) { res = CudaDNNNativeMethods.cudnnGetConvolutionNdForwardOutputDim(_desc, inputTensorDesc.Desc, filterDesc.Desc, nbDims, tensorOuputDimA); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionNdForwardOutputDim", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/// <summary> /// This function returns the dimensions of the resulting 4D tensor of a 2D convolution, /// given the convolution descriptor, the input tensor descriptor and the filter descriptor /// This function can help to setup the output tensor and allocate the proper amount of /// memory prior to launch the actual convolution.<para/> /// Each dimension h and w of the output images is computed as followed:<para/> /// outputDim = 1 + (inputDim + 2*pad - filterDim)/convolutionStride; /// </summary> /// <param name="inputTensorDesc">Handle to a previously initialized tensor descriptor.</param> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="n">Number of output images.</param> /// <param name="c">Number of output feature maps per image.</param> /// <param name="h">Height of each output feature map.</param> /// <param name="w">Width of each output feature map.</param> public void GetConvolution2dForwardOutputDim(TensorDescriptor inputTensorDesc, FilterDescriptor filterDesc, ref int n, ref int c, ref int h, ref int w ) { res = CudaDNNNativeMethods.cudnnGetConvolution2dForwardOutputDim(_desc, inputTensorDesc.Desc, filterDesc.Desc, ref n, ref c, ref h, ref w); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolution2dForwardOutputDim", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public void Im2Col(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable <double> srcData, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, CudaDeviceVariable <byte> colBuffer ) { res = CudaDNNNativeMethods.cudnnIm2Col(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, convDesc.Desc, colBuffer.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnIm2Col", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public void GetConvolutionForwardAlgorithm(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdPreference preference, SizeT memoryLimitInbytes, ref cudnnConvolutionFwdAlgo algo ) { res = CudaDNNNativeMethods.cudnnGetConvolutionForwardAlgorithm(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardAlgorithm", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public SizeT GetConvolutionForwardWorkspaceSize(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdAlgo algo ) { SizeT sizeInBytes = 0; res = CudaDNNNativeMethods.cudnnGetConvolutionForwardWorkspaceSize(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardWorkspaceSize", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(sizeInBytes); }
public void ConvolutionBackwardData(double alpha, FilterDescriptor filterDesc, CudaDeviceVariable <double> filterData, TensorDescriptor diffDesc, CudaDeviceVariable <double> diffData, ConvolutionDescriptor convDesc, double beta, TensorDescriptor gradDesc, CudaDeviceVariable <double> gradData ) { res = CudaDNNNativeMethods.cudnnConvolutionBackwardData(_handle, ref alpha, filterDesc.Desc, filterData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, ref beta, gradDesc.Desc, gradData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardData", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform the forward multiconvolution */ public void ConvolutionForward(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable <double> srcData, FilterDescriptor filterDesc, CudaDeviceVariable <double> filterData, ConvolutionDescriptor convDesc, cudnnConvolutionFwdAlgo algo, CudaDeviceVariable <byte> workSpace, SizeT workSpaceSizeInBytes, double beta, TensorDescriptor destDesc, CudaDeviceVariable <double> destData ) { res = CudaDNNNativeMethods.cudnnConvolutionForward(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, filterData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpaceSizeInBytes, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionForward", res)); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
/// <summary> /// This function is used to obtain a pointer and descriptor for the matrix parameters in layer within /// the RNN described by rnnDesc with inputs dimensions defined by xDesc. /// </summary> /// <param name="layer">The layer to query.</param> /// <param name="xDesc">An array of tensor descriptors describing the input to each recurrent iteration.</param> /// <param name="wDesc">Handle to a previously initialized filter descriptor describing the weights for the RNN.</param> /// <param name="w">Data pointer to GPU memory associated with the filter descriptor wDesc.</param> /// <param name="linLayerID"> /// The linear layer to obtain information about: /// * If mode in rnnDesc was set to CUDNN_RNN_RELU or CUDNN_RNN_TANH a value of 0 references the matrix multiplication /// applied to the input from the previous layer, a value of 1 references the matrix multiplication applied to the recurrent input. /// * If mode in rnnDesc was set to CUDNN_LSTM values of 0-3 reference matrix multiplications applied to the input from the /// previous layer, value of 4-7 reference matrix multiplications applied to the recurrent input. /// ‣ Values 0 and 4 reference the input gate. /// ‣ Values 1 and 5 reference the forget gate. /// ‣ Values 2 and 6 reference the new memory gate. /// ‣ Values 3 and 7 reference the output gate. /// * If mode in rnnDesc was set to CUDNN_GRU values of 0-2 reference matrix multiplications applied to the input /// from the previous layer, value of 3-5 reference matrix multiplications applied to the recurrent input. /// ‣ Values 0 and 3 reference the reset gate. /// ‣ Values 1 and 4 reference the update gate. /// ‣ Values 2 and 5 reference the new memory gate. /// </param> /// <param name="linLayerMatDesc">Handle to a previously created filter descriptor.</param> /// <param name="linLayerMat">Data pointer to GPU memory associated with the filter descriptor linLayerMatDesc.</param> public void GetRNNLinLayerMatrixParams( int layer, TensorDescriptor[] xDesc, FilterDescriptor wDesc, CudaDeviceVariable<double> w, int linLayerID, FilterDescriptor linLayerMatDesc, CudaDeviceVariable<SizeT> linLayerMat // void ** ) { var a1 = xDesc.Select(x => x.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnGetRNNLinLayerMatrixParams(_handle, _desc, layer, a1, wDesc.Desc, w.DevicePointer, linLayerID, linLayerMatDesc.Desc, linLayerMat.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetRNNLinLayerMatrixParams", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */ public void GetConvolutionNdForwardOutputDim(TensorDescriptor inputTensorDesc, FilterDescriptor filterDesc, int nbDims, int[] tensorOuputDimA ) { res = CudaDNNNativeMethods.cudnnGetConvolutionNdForwardOutputDim(_desc, inputTensorDesc.Desc, filterDesc.Desc, nbDims, tensorOuputDimA); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionNdForwardOutputDim", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public void Im2Col(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, CudaDeviceVariable<byte> colBuffer ) { res = CudaDNNNativeMethods.cudnnIm2Col(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, convDesc.Desc, colBuffer.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnIm2Col", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
public SizeT GetConvolutionForwardWorkspaceSize(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdAlgo algo ) { SizeT sizeInBytes = 0; res = CudaDNNNativeMethods.cudnnGetConvolutionForwardWorkspaceSize(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardWorkspaceSize", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); return sizeInBytes; }
public void GetConvolutionForwardAlgorithm(TensorDescriptor srcDesc, FilterDescriptor filterDesc, ConvolutionDescriptor convDesc, TensorDescriptor destDesc, cudnnConvolutionFwdPreference preference, SizeT memoryLimitInbytes, ref cudnnConvolutionFwdAlgo algo ) { res = CudaDNNNativeMethods.cudnnGetConvolutionForwardAlgorithm(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionForwardAlgorithm", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform the forward multiconvolution */ public void ConvolutionForward(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, FilterDescriptor filterDesc, CudaDeviceVariable<double> filterData, ConvolutionDescriptor convDesc, cudnnConvolutionFwdAlgo algo, CudaDeviceVariable<byte> workSpace, SizeT workSpaceSizeInBytes, double beta, TensorDescriptor destDesc, CudaDeviceVariable<double> destData ) { res = CudaDNNNativeMethods.cudnnConvolutionForward(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, filterDesc.Desc, filterData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpaceSizeInBytes, ref beta, destDesc.Desc, destData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionForward", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function computes the convolution gradient with respect to the output tensor using /// the specified algo, returning results in gradDesc. Scaling factors alpha and beta can /// be used to scale the input tensor and the output tensor respectively. /// </summary> /// <param name="alpha">Pointer to scaling factors (in host memory) used to blend the computation /// result with prior value in the output layer as follows: dstValue = /// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for /// additional details.</param> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="filterData">Data pointer to GPU memory associated with the filter descriptor filterDesc.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="diffData">Data pointer to GPU memory associated with the input differential tensor descriptor diffDesc.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="algo">Enumerant that specifies which backward data convolution algorithm shoud be used to compute the results</param> /// <param name="workSpace">Data pointer to GPU memory to a workspace needed to able to execute /// the specified algorithm. If no workspace is needed for a particular /// algorithm, that pointer can be nil</param> /// <param name="beta">Pointer to scaling factors (in host memory) used to blend the computation /// result with prior value in the output layer as follows: dstValue = /// alpha[0]*result + beta[0]*priorDstValue. Please refer to this section for /// additional details.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="gradData">Data pointer to GPU memory associated with the output tensor descriptor /// gradDesc that carries the result.</param> public void ConvolutionBackwardData(double alpha, FilterDescriptor filterDesc, CudaDeviceVariable<double> filterData, TensorDescriptor diffDesc, CudaDeviceVariable<double> diffData, ConvolutionDescriptor convDesc, cudnnConvolutionBwdDataAlgo algo, CudaDeviceVariable<byte> workSpace, double beta, TensorDescriptor gradDesc, CudaDeviceVariable<double> gradData ) { res = CudaDNNNativeMethods.cudnnConvolutionBackwardData(_handle, ref alpha, filterDesc.Desc, filterData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, algo, workSpace.DevicePointer, workSpace.SizeInBytes, ref beta, gradDesc.Desc, gradData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardData", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This routine executes the recurrent neural network described by rnnDesc with inputs x, hx, cx, weights w /// and outputs y, hy, cy. workspace is required for intermediate storage. reserveSpace stores data required /// for training. The same reserveSpace data must be used for future calls to cudnnRNNBackwardData and /// cudnnRNNBackwardWeights if these execute on the same input data. /// </summary> /// <param name="xDesc">An array of tensor descriptors describing the input to each recurrent iteration. Each /// tensor descriptor must have the same first dimension. The second dimension of the tensors may decrease /// from element n to element n+1 but may not increase. The tensor must be fully packed.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptors in the array xDesc.</param> /// <param name="hxDesc">Handle to a previously initialized tensor descriptor describing the initial hidden state /// of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second /// dimension of the first tensor described in xDesc. The third dimension must match the numLayers argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="hx">Data pointer to GPU memory associated with the tensor descriptor hxDesc. If a NULL pointer /// is passed, the initial hidden state of the network will be initialized to zero.</param> /// <param name="cxDesc">Handle to a previously initialized tensor descriptor describing the initial /// cell state for LSTM networks. The first dimension of the tensor must match the hiddenSize argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match /// the second dimension of the first tensor described in xDesc. The third dimension must match the numLayers /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully /// packed.</param> /// <param name="cx">Data pointer to GPU memory associated with the tensor descriptor cxDesc. If a NULL pointer is /// passed, the initial cell state of the network will be initialized to zero.</param> /// <param name="wDesc">Handle to a previously initialized filter descriptor describing the weights for the RNN.</param> /// <param name="w">Data pointer to GPU memory associated with the filter descriptor wDesc.</param> /// <param name="yDesc">An array of tensor descriptors describing the output from each recurrent iteration. The first /// dimension of the tensor depends on the direction argument passed to the cudnnSetRNNDescriptor /// call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor /// n in xDesc. The tensor must be fully packed.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor yDesc.</param> /// <param name="hyDesc">Handle to a previously initialized tensor descriptor describing the final /// hidden state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="hy">Data pointer to GPU memory associated with the tensor descriptor hyDesc. If a /// NULL pointer is passed, the final hidden state of the network will not be saved.</param> /// <param name="cyDesc">Handle to a previously initialized tensor descriptor describing the final cell state /// for LSTM networks. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="cy">Data pointer to GPU memory associated with the tensor descriptor cyDesc. If a NULL pointer is /// passed, the final cell state of the network will be not be saved.</param> /// <param name="workspace">Data pointer to GPU memory to be used as a workspace for this call.</param> /// <param name="workSpaceSizeInBytes">Specifies the size in bytes of the provided workspace.</param> /// <param name="reserveSpace">Data pointer to GPU memory to be used as a reserve space for this call.</param> /// <param name="reserveSpaceSizeInBytes">Specifies the size in bytes of the provided reserveSpace.</param> public void RNNForwardTraining( TensorDescriptor[] xDesc, CudaDeviceVariable<double> x, TensorDescriptor hxDesc, CudaDeviceVariable<double> hx, TensorDescriptor cxDesc, CudaDeviceVariable<double> cx, FilterDescriptor wDesc, CudaDeviceVariable<double> w, TensorDescriptor[] yDesc, CudaDeviceVariable<double> y, TensorDescriptor hyDesc, CudaDeviceVariable<double> hy, TensorDescriptor cyDesc, CudaDeviceVariable<double> cy, CudaDeviceVariable<byte> workspace, SizeT workSpaceSizeInBytes, CudaDeviceVariable<byte> reserveSpace, SizeT reserveSpaceSizeInBytes) { var a1 = xDesc.Select(q => q.Desc).ToArray(); var a2 = yDesc.Select(q => q.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnRNNForwardTraining( _handle, _desc, a1, x.DevicePointer, hxDesc.Desc, hx.DevicePointer, cxDesc.Desc, cx.DevicePointer, wDesc.Desc, w.DevicePointer, a2, y.DevicePointer, hyDesc.Desc, hy.DevicePointer, cyDesc.Desc, cy.DevicePointer, workspace.DevicePointer, workSpaceSizeInBytes, reserveSpace.DevicePointer, reserveSpaceSizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnRNNForwardTraining", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This routine executes the recurrent neural network described by rnnDesc with /// output gradients dy, dhy, dhc, weights w and input gradients dx, dhx, dcx. /// workspace is required for intermediate storage. The data in reserveSpace must have /// previously been generated by cudnnRNNForwardTraining. The same reserveSpace data must /// be used for future calls to cudnnRNNBackwardWeights if they execute on the same input data. /// </summary> /// <param name="yDesc">An array of tensor descriptors describing the output from each /// recurrent iteration. The first dimension of the tensor depends on the direction /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the /// hiddenSize argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor n in dyDesc. /// The tensor must be fully packed.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor yDesc.</param> /// <param name="dyDesc">An array of tensor descriptors describing the gradient at the output from each /// recurrent iteration. The first dimension of the tensor depends on the direction argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor n in dxDesc. The /// tensor must be fully packed.</param> /// <param name="dy">Data pointer to GPU memory associated with the tensor descriptors in the array dyDesc.</param> /// <param name="dhyDesc">Handle to a previously initialized tensor descriptor describing the gradients at the /// final hidden state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed /// to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second /// dimension of the first tensor described in dyDesc. The third dimension must match the numLayers argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="dhy">Data pointer to GPU memory associated with the tensor descriptor dhyDesc. If a NULL pointer /// is passed, the gradients at the final hidden state of the network will be initialized to zero.</param> /// <param name="dcyDesc">Handle to a previously initialized tensor descriptor describing the gradients at /// the final cell state of the RNN. The first dimension of the tensor must match the hiddenSize argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the /// second dimension of the first tensor described in dyDesc. The third dimension must match the numLayers argument /// passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="dcy">Data pointer to GPU memory associated with the tensor descriptor dcyDesc. If a NULL pointer /// is passed, the gradients at the final cell state of the network will be initialized to zero.</param> /// <param name="wDesc">Handle to a previously initialized filter descriptor describing the weights for the RNN.</param> /// <param name="w">Data pointer to GPU memory associated with the filter descriptor wDesc.</param> /// <param name="hxDesc">Handle to a previously initialized tensor descriptor describing the initial hidden /// state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second /// dimension of the first tensor described in xDesc. The third dimension must match the numLayers /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be /// fully packed.</param> /// <param name="hx">Data pointer to GPU memory associated with the tensor descriptor hxDesc. If a NULL pointer is /// passed, the initial hidden state of the network will be initialized to zero.</param> /// <param name="cxDesc">Handle to a previously initialized tensor descriptor describing the /// initial cell state for LSTM networks. The first dimension of the tensor must match the /// hiddenSize argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The /// second dimension must match the second dimension of the first tensor described in xDesc. The /// third dimension must match the numLayers argument passed to the cudnnSetRNNDescriptor call /// used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="cx">Data pointer to GPU memory associated with the tensor descriptor cxDesc. /// If a NULL pointer is passed, the initial cell state of the network will be initialized to zero.</param> /// <param name="dxDesc">An array of tensor descriptors describing the gradient at the input of each recurrent iteration. /// Each tensor descriptor must have the same first dimension. The second dimension of the tensors may decrease from /// element n to element n+1 but may not increase. The tensor must be fully packed.</param> /// <param name="dx">Data pointer to GPU memory associated with the tensor descriptors in the array dxDesc. </param> /// <param name="dhxDesc">Handle to a previously initialized tensor descriptor describing the gradient at the initial hidden /// state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the cudnnSetRNNDescriptor /// call used to initialize rnnDesc. The second dimension must match the second dimension of the first tensor described in xDesc. /// The third dimension must match the numLayers argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc. /// The tensor must be fully packed.</param> /// <param name="dhx">Data pointer to GPU memory associated with the tensor descriptor dhxDesc. If a NULL pointer is passed, the /// gradient at the hidden input of the network will not be set.</param> /// <param name="dcxDesc">Handle to a previously initialized tensor descriptor describing the gradient /// at the initial cell state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed /// to the cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed.</param> /// <param name="dcx">Data pointer to GPU memory associated with the tensor descriptor dcxDesc. If /// a NULL pointer is passed, the gradient at the cell input of the network will not be set.</param> /// <param name="workspace">Data pointer to GPU memory to be used as a workspace for this call.</param> /// <param name="workSpaceSizeInBytes">Specifies the size in bytes of the provided workspace.</param> /// <param name="reserveSpace">Data pointer to GPU memory to be used as a reserve space for this call.</param> /// <param name="reserveSpaceSizeInBytes">Specifies the size in bytes of the provided reserveSpace.</param> public void RNNBackwardData( TensorDescriptor[] yDesc, CudaDeviceVariable<float> y, TensorDescriptor[] dyDesc, CudaDeviceVariable<float> dy, TensorDescriptor dhyDesc, CudaDeviceVariable<float> dhy, TensorDescriptor dcyDesc, CudaDeviceVariable<float> dcy, FilterDescriptor wDesc, CudaDeviceVariable<float> w, TensorDescriptor hxDesc, CudaDeviceVariable<float> hx, TensorDescriptor cxDesc, CudaDeviceVariable<float> cx, TensorDescriptor[] dxDesc, CudaDeviceVariable<float> dx, TensorDescriptor dhxDesc, CudaDeviceVariable<float> dhx, TensorDescriptor dcxDesc, CudaDeviceVariable<float> dcx, CudaDeviceVariable<byte> workspace, SizeT workSpaceSizeInBytes, CudaDeviceVariable<byte> reserveSpace, SizeT reserveSpaceSizeInBytes) { var a1 = yDesc.Select(q => q.Desc).ToArray(); var a2 = dyDesc.Select(q => q.Desc).ToArray(); var a3 = dxDesc.Select(q => q.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnRNNBackwardData( _handle, _desc, a1, y.DevicePointer, a2, dy.DevicePointer, dhyDesc.Desc, dhy.DevicePointer, dcyDesc.Desc, dcy.DevicePointer, wDesc.Desc, w.DevicePointer, hxDesc.Desc, hx.DevicePointer, cxDesc.Desc, cx.DevicePointer, a3, dx.DevicePointer, dhxDesc.Desc, dhx.DevicePointer, dcxDesc.Desc, dcx.DevicePointer, workspace.DevicePointer, workSpaceSizeInBytes, reserveSpace.DevicePointer, reserveSpaceSizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnRNNBackwardData", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function serves as a heuristic for obtaining the best suited algorithm for /// cudnnConvolutionBackwardData_v3 for the given layer specifications. Based /// on the input preference, this function will either return the fastest algorithm or the /// fastest algorithm within a given memory limit. For an exhaustive search for the fastest /// algorithm, please use cudnnFindConvolutionBackwardDataAlgorithm. /// </summary> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="preference">Enumerant to express the preference criteria in terms of memory /// requirement and speed.</param> /// <param name="memoryLimitInbytes">It is to specify the maximum amount of GPU memory the user is willing to /// use as a workspace. This is currently a placeholder and is not used.</param> /// <returns>Enumerant that specifies which convolution algorithm should be used to /// compute the results according to the specified preference</returns> public cudnnConvolutionBwdDataAlgo GetConvolutionBackwardDataAlgorithm(FilterDescriptor filterDesc, TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, TensorDescriptor gradDesc, cudnnConvolutionBwdDataPreference preference, SizeT memoryLimitInbytes ) { cudnnConvolutionBwdDataAlgo algo = new cudnnConvolutionBwdDataAlgo(); res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardDataAlgorithm(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, preference, memoryLimitInbytes, ref algo); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardDataAlgorithm", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); return algo; }
/// <summary> /// This routine accumulates weight gradients dw from the recurrent neural network described /// by rnnDesc with inputs x, hx, and outputs y. The mode of operation in this case is additive, /// the weight gradients calculated will be added to those already existing in dw. workspace /// is required for intermediate storage. The data in reserveSpace must have previously been /// generated by cudnnRNNBackwardData. /// </summary> /// <param name="xDesc">An array of tensor descriptors describing the input to each recurrent iteration. /// Each tensor descriptor must have the same first dimension. The second dimension of the tensors may /// decrease from element n to element n+1 but may not increase. The tensor must be fully packed.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptors in the array xDesc.</param> /// <param name="hxDesc">Handle to a previously initialized tensor descriptor describing the initial hidden /// state of the RNN. The first dimension of the tensor must match the hiddenSize argument passed to the /// cudnnSetRNNDescriptor call used to initialize rnnDesc. The second dimension must match the second dimension /// of the first tensor described in xDesc. The third dimension must match the numLayers argument passed to /// the cudnnSetRNNDescriptor call used to initialize rnnDesc. The tensor must be fully packed. </param> /// <param name="hx">Data pointer to GPU memory associated with the tensor descriptor hxDesc. If /// a NULL pointer is passed, the initial hidden state of the network will be initialized to zero.</param> /// <param name="yDesc">An array of tensor descriptors describing the output from each /// recurrent iteration. The first dimension of the tensor depends on the direction /// argument passed to the cudnnSetRNNDescriptor call used to initialize rnnDesc: /// * If direction is CUDNN_UNIDIRECTIONAL the first dimension should match the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// * If direction is CUDNN_BIDIRECTIONAL the first dimension should match double the hiddenSize /// argument passed to cudnnSetRNNDescriptor. /// The second dimension of the tensor n must match the second dimension of the tensor n in dyDesc. /// The tensor must be fully packed.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor yDesc.</param> /// <param name="workspace">Data pointer to GPU memory to be used as a workspace for this call.</param> /// <param name="workSpaceSizeInBytes">Specifies the size in bytes of the provided workspace.</param> /// <param name="dwDesc">Handle to a previously initialized filter descriptor describing the gradients of the weights for the RNN.</param> /// <param name="dw">Data pointer to GPU memory associated with the filter descriptor dwDesc.</param> /// <param name="reserveSpace">Data pointer to GPU memory to be used as a reserve space for this call.</param> /// <param name="reserveSpaceSizeInBytes">Specifies the size in bytes of the provided reserveSpace.</param> public void RNNBackwardWeights( TensorDescriptor[] xDesc, CudaDeviceVariable<float> x, TensorDescriptor hxDesc, CudaDeviceVariable<float> hx, TensorDescriptor[] yDesc, CudaDeviceVariable<float> y, CudaDeviceVariable<byte> workspace, SizeT workSpaceSizeInBytes, FilterDescriptor dwDesc, CudaDeviceVariable<float> dw, CudaDeviceVariable<byte> reserveSpace, SizeT reserveSpaceSizeInBytes) { var a1 = xDesc.Select(q => q.Desc).ToArray(); var a2 = yDesc.Select(q => q.Desc).ToArray(); res = CudaDNNNativeMethods.cudnnRNNBackwardWeights( _handle, _desc, a1, x.DevicePointer, hxDesc.Desc, hx.DevicePointer, a2, y.DevicePointer, workspace.DevicePointer, workSpaceSizeInBytes, dwDesc.Desc, dw.DevicePointer, reserveSpace.DevicePointer, reserveSpaceSizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnRNNBackwardWeights", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function returns the amount of GPU memory workspace the user needs /// to allocate to be able to call cudnnConvolutionBackwardData_v3 with the /// specified algorithm. The workspace allocated will then be passed to the routine /// cudnnConvolutionBackwardData_v3. The specified algorithm can be the result of the /// call to cudnnGetConvolutionBackwardDataAlgorithm or can be chosen arbitrarily /// by the user. Note that not every algorithm is available for every configuration of the /// input tensor and/or every configuration of the convolution descriptor. /// </summary> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="algo">Enumerant that specifies the chosen convolution algorithm</param> /// <returns>Amount of GPU memory needed as workspace to be able to execute a forward convolution with the specified algo</returns> public SizeT GetConvolutionBackwardDataWorkspaceSize(FilterDescriptor filterDesc, TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, TensorDescriptor gradDesc, cudnnConvolutionBwdDataAlgo algo ) { SizeT sizeInBytes = new SizeT(); res = CudaDNNNativeMethods.cudnnGetConvolutionBackwardDataWorkspaceSize(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, algo, ref sizeInBytes); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnGetConvolutionBackwardDataWorkspaceSize", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); return sizeInBytes; }
public void ConvolutionBackwardFilter(double alpha, TensorDescriptor srcDesc, CudaDeviceVariable<double> srcData, TensorDescriptor diffDesc, CudaDeviceVariable<double> diffData, ConvolutionDescriptor convDesc, double beta, FilterDescriptor gradDesc, CudaDeviceVariable<double> gradData ) { res = CudaDNNNativeMethods.cudnnConvolutionBackwardFilter(_handle, ref alpha, srcDesc.Desc, srcData.DevicePointer, diffDesc.Desc, diffData.DevicePointer, convDesc.Desc, ref beta, gradDesc.Desc, gradData.DevicePointer); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnConvolutionBackwardFilter", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); }
/// <summary> /// This function attempts all cuDNN algorithms for /// cudnnConvolutionBackwardData_v3 and outputs performance metrics to a user- /// allocated array of cudnnConvolutionBwdDataAlgoPerf_t. These metrics are written /// in sorted fashion where the first element has the lowest compute time. /// </summary> /// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param> /// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param> /// <param name="convDesc">Previously initialized convolution descriptor.</param> /// <param name="gradDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="requestedAlgoCount">The maximum number of elements to be stored in perfResults.</param> /// <returns>An array to store performance metrics sorted ascending by compute time.</returns> public cudnnConvolutionBwdDataAlgoPerf[] FindConvolutionBackwardDataAlgorithm(FilterDescriptor filterDesc, TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, TensorDescriptor gradDesc, int requestedAlgoCount ) { cudnnConvolutionBwdDataAlgoPerf[] temp = new cudnnConvolutionBwdDataAlgoPerf[requestedAlgoCount]; int returnedAlgoCount = 0; res = CudaDNNNativeMethods.cudnnFindConvolutionBackwardDataAlgorithm(_handle, filterDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, requestedAlgoCount, ref returnedAlgoCount, temp); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnFindConvolutionBackwardDataAlgorithm", res)); if (res != cudnnStatus.Success) throw new CudaDNNException(res); if (returnedAlgoCount <= 0) return null; cudnnConvolutionBwdDataAlgoPerf[] perfResults = new cudnnConvolutionBwdDataAlgoPerf[returnedAlgoCount]; Array.Copy(temp, perfResults, returnedAlgoCount); return perfResults; }