//[/CudnnMnistFCF] //[CudnnMnistCF] public void ConvoluteForward(Layer conv, nchw_t nchw, DeviceMemory <float> srcData, ref DeviceMemory <float> dstData) { _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); _filterDesc.Set4D(DataType, conv.Outputs, conv.Inputs, conv.KernelDim, conv.KernelDim); _convDesc.Set2D(0, 0, 1, 1, 1, 1, CUDNNInterop.cudnnConvolutionMode_t.CUDNN_CROSS_CORRELATION); // find dimension of convoltion output // outputDim = 1 + (inputDim + 2*pad - filterDim) / convolutionStride int n, c, h, w; _convDesc.Get2DForwardOutputDim(_srcTensorDesc, _filterDesc, out n, out c, out h, out w); nchw.N = n; nchw.C = c; nchw.H = h; nchw.W = w; _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); var algo = _cudnn.GetConvolutionForwardAlgorithm(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, CUDNNInterop.cudnnConvolutionFwdPreference_t.CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, (IntPtr)0); Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W); var sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, algo); using (var workSpace = _worker.Malloc <byte>(sizeInBytes.ToInt32())) { const float alpha = 1.0f; const float beta = 0.0f; _cudnn.ConvolutionForward(alpha, _srcTensorDesc, srcData.Ptr, _filterDesc, conv.DataD.Ptr, _convDesc, algo, workSpace.Ptr, sizeInBytes, beta, _dstTensorDesc, dstData.Ptr); AddBias(_dstTensorDesc, conv, c, dstData); } }
public void AddBias(CUDNNTensorDescriptor dstTensorDesc, Layer layer, int c, DeviceMemory <float> data) { _biasTensorDesc.Set4D(TensorFormat, DataType, 1, c, 1, 1); const float alpha = 1.0f; const float beta = 1.0f; _cudnn.AddTensor(CUDNNInterop.cudnnAddMode_t.CUDNN_ADD_SAME_C, alpha, _biasTensorDesc, layer.BiasD.Ptr, beta, dstTensorDesc, data.Ptr); }