public static extern cudnnStatus cudnnFindConvolutionBackwardFilterAlgorithmEx(
                                 cudnnHandle handle,
                                 cudnnTensorDescriptor xDesc,
                                 CUdeviceptr x,
                                 cudnnTensorDescriptor dyDesc,
                                 CUdeviceptr dy,
                                 cudnnConvolutionDescriptor convDesc,
                                 cudnnFilterDescriptor dwDesc,
                                 CUdeviceptr dw,
                                 int requestedAlgoCount,
                                 ref int returnedAlgoCount,
                                 ref cudnnConvolutionBwdFilterAlgoPerf perfResults,
                                 CUdeviceptr workSpace,
                                 SizeT                               workSpaceSizeInBytes );
		public static extern cudnnStatus cudnnFindConvolutionBackwardFilterAlgorithm( cudnnHandle     handle,
                                                                       cudnnTensorDescriptor          srcDesc,
                                                                       cudnnTensorDescriptor          diffDesc,
                                                                       cudnnConvolutionDescriptor     convDesc, 
                                                                       cudnnFilterDescriptor          gradDesc,
                                                                       int                              requestedAlgoCount,
                                                                       ref int                          returnedAlgoCount,
                                                                       cudnnConvolutionBwdFilterAlgoPerf[] perfResults   
                                                                     );
		/// <summary>
		/// This function attempts all cuDNN algorithms for cudnnConvolutionBackwardFilter_v3 and outputs performance metrics to a user-
		/// allocated array of cudnnConvolutionBwdFilterAlgoPerf_t. These metrics are
		/// written in sorted fashion where the first element has the lowest compute time. 
		/// </summary>
		/// <param name="srcDesc">Handle to the previously initialized input tensor descriptor.</param>
		/// <param name="diffDesc">Handle to the previously initialized input differential tensor descriptor.</param>
		/// <param name="convDesc">Previously initialized convolution descriptor.</param>
		/// <param name="gradDesc">Handle to a previously initialized filter descriptor.</param>
		/// <param name="requestedAlgoCount">The maximum number of elements to be stored in perfResults.</param>
		/// <returns>An array to store performance metrics sorted ascending by compute time.</returns>
		public cudnnConvolutionBwdFilterAlgoPerf[] FindConvolutionBackwardFilterAlgorithm(TensorDescriptor srcDesc,
															TensorDescriptor diffDesc,
															ConvolutionDescriptor convDesc,
															FilterDescriptor gradDesc,
															int requestedAlgoCount
															)
		{
			cudnnConvolutionBwdFilterAlgoPerf[] temp = new cudnnConvolutionBwdFilterAlgoPerf[requestedAlgoCount];
			int returnedAlgoCount = 0;
			res = CudaDNNNativeMethods.cudnnFindConvolutionBackwardFilterAlgorithm(_handle, srcDesc.Desc, diffDesc.Desc, convDesc.Desc, gradDesc.Desc, requestedAlgoCount, ref returnedAlgoCount, temp);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnFindConvolutionBackwardFilterAlgorithm", res));
			if (res != cudnnStatus.Success) throw new CudaDNNException(res);
			if (returnedAlgoCount <= 0) return null;

			cudnnConvolutionBwdFilterAlgoPerf[] perfResults = new cudnnConvolutionBwdFilterAlgoPerf[returnedAlgoCount];
			Array.Copy(temp, perfResults, returnedAlgoCount);
			return perfResults;
		}