public static extern cudnnStatus cudnnFindConvolutionForwardAlgorithmEx(
                                 cudnnHandle handle,
                                 cudnnTensorDescriptor xDesc,
                                 CUdeviceptr x,
                                 cudnnFilterDescriptor wDesc,
                                 CUdeviceptr w,
                                 cudnnConvolutionDescriptor convDesc,
                                 cudnnTensorDescriptor yDesc,
                                 CUdeviceptr y,
                                 int requestedAlgoCount,
                                 ref int returnedAlgoCount,
                                 ref cudnnConvolutionFwdAlgoPerf      perfResults,
                                 CUdeviceptr workSpace,
                                 SizeT                              workSpaceSizeInBytes );
		public static extern cudnnStatus cudnnFindConvolutionForwardAlgorithm(cudnnHandle   handle,
                                                                 cudnnTensorDescriptor      srcDesc,
                                                                 cudnnFilterDescriptor      filterDesc,
                                                                 cudnnConvolutionDescriptor convDesc, 
                                                                 cudnnTensorDescriptor      destDesc,
                                                                 int                        requestedAlgoCount,
                                                                 ref int                    returnedAlgoCount,
                                                                 cudnnConvolutionFwdAlgoPerf[] perfResults                                                 
                                                                );
		/// <summary>
		/// This function attempts all cuDNN algorithms and outputs performance metrics to a
		/// user-allocated array of cudnnConvolutionFwdAlgoPerf_t. These metrics are written
		/// in sorted fashion where the first element has the lowest compute time.
		/// </summary>
		/// <param name="srcDesc">Handle to the previously initialized input tensor descriptor.</param>
		/// <param name="filterDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="destDesc">Handle to the previously initialized output tensor descriptor.</param>
		/// <param name="requestedAlgoCount">The maximum number of elements to be stored in perfResults.</param>
		/// <returns>An array to store performance metrics sorted ascending by compute time.</returns>
		public cudnnConvolutionFwdAlgoPerf[] FindConvolutionForwardAlgorithm(TensorDescriptor srcDesc,
													FilterDescriptor filterDesc,
													ConvolutionDescriptor convDesc,
													TensorDescriptor destDesc,
													int requestedAlgoCount
												)
		{
			cudnnConvolutionFwdAlgoPerf[] temp = new cudnnConvolutionFwdAlgoPerf[requestedAlgoCount];
			int returnedAlgoCount = 0;
			res = CudaDNNNativeMethods.cudnnFindConvolutionForwardAlgorithm(_handle, srcDesc.Desc, filterDesc.Desc, convDesc.Desc, destDesc.Desc, requestedAlgoCount, ref returnedAlgoCount, temp);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnFindConvolutionForwardAlgorithm", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
			if (returnedAlgoCount <= 0) return null;

			cudnnConvolutionFwdAlgoPerf[] perfResults = new cudnnConvolutionFwdAlgoPerf[returnedAlgoCount];
			Array.Copy(temp, perfResults, returnedAlgoCount);
			return perfResults;
		}