/// <summary> /// Gets the version of CUFFT (CUDA 5.0 only) /// </summary> /// <returns>Version of library or -1 if not supported or available.</returns> public override int GetVersion() { int version = -1; CUFFTResult res = _driver.cufftGetVersion(ref version); if (res != CUFFTResult.Success) { version = -1; } return(version); }
/// <summary> /// Configures the layout of CUFFT output in FFTW‐compatible modes. /// When FFTW compatibility is desired, it can be configured for padding /// only, for asymmetric complex inputs only, or to be fully compatible. /// </summary> /// <param name="plan">The plan.</param> /// <param name="mode">The mode.</param> public override void SetCompatibilityMode(FFTPlan plan, eCompatibilityMode mode) { CUFFTCompatibility cumode = (CUFFTCompatibility)mode; FFTPlanEx planEx = Plans[plan]; CUFFTResult res = _driver.cufftSetCompatibilityMode(planEx.CudaFFTHandle, cumode); if (res != CUFFTResult.Success) { throw new CudafyHostException(res.ToString()); } }
/// <summary> /// Sets the stream. /// </summary> /// <param name="plan">The plan to set the stream for.</param> /// <param name="streamId">The stream id.</param> public override void SetStream(FFTPlan plan, int streamId) { if (streamId < 0) { throw new ArgumentOutOfRangeException("streamId"); } CUstream cus = (CUstream)_gpu.GetStream(streamId); FFTPlanEx planEx = Plans[plan]; cudaStream cs = new cudaStream(); //cs.Value = cus.Pointer.ToInt32(); CUFFTResult res = _driver.cufftSetStream(planEx.CudaFFTHandle, cs); if (res != CUFFTResult.Success) { throw new CudafyMathException(CudafyMathException.csCUDA_EXCEPTION_X, res); } }
/// <summary> /// Frees the specified plan. /// </summary> /// <param name="plan">The plan.</param> public override void Remove(FFTPlan plan) { FFTPlanEx planEx = Plans[plan]; CUFFTResult res = _driver.cufftDestroy(planEx.CudaFFTHandle); if (res != CUFFTResult.Success) { //throw new CudafyHostException(res.ToString()); Debug.WriteLine("remove plan failed: " + res.ToString()); } else { Debug.WriteLine("remove plan succeeded: " + res.ToString()); } Plans.Remove(plan); }
private void DoExecute(FFTPlan plan, object input, object output, bool inverse = false) { FFTPlanEx planEx = Plans[plan]; CUDevicePtrEx inPtrEx; CUDevicePtrEx outPtrEx; inPtrEx = _gpu.GetDeviceMemory(input) as CUDevicePtrEx; outPtrEx = _gpu.GetDeviceMemory(output) as CUDevicePtrEx; CUFFTDirection dir = inverse ? CUFFTDirection.Inverse : CUFFTDirection.Forward; CUFFTResult res = CUFFTResult.ExecFailed; if (planEx.CudaFFTType == CUFFTType.C2C) { res = _driver.cufftExecC2C(planEx.CudaFFTHandle, inPtrEx.DevPtr, outPtrEx.DevPtr, dir); } else if (planEx.CudaFFTType == CUFFTType.C2R) { res = _driver.cufftExecC2R(planEx.CudaFFTHandle, inPtrEx.DevPtr, outPtrEx.DevPtr); } else if (planEx.CudaFFTType == CUFFTType.D2Z) { res = _driver.cufftExecD2Z(planEx.CudaFFTHandle, inPtrEx.DevPtr, outPtrEx.DevPtr); } else if (planEx.CudaFFTType == CUFFTType.R2C) { res = _driver.cufftExecR2C(planEx.CudaFFTHandle, inPtrEx.DevPtr, outPtrEx.DevPtr); } else if (planEx.CudaFFTType == CUFFTType.Z2D) { res = _driver.cufftExecZ2D(planEx.CudaFFTHandle, inPtrEx.DevPtr, outPtrEx.DevPtr); } else if (planEx.CudaFFTType == CUFFTType.Z2Z) { res = _driver.cufftExecZ2Z(planEx.CudaFFTHandle, inPtrEx.DevPtr, outPtrEx.DevPtr, dir); } if (res != CUFFTResult.Success) { throw new CudafyMathException(res.ToString()); } }
public CUFFTException(CUFFTResult error) { this.error = error; }
public CUFFTException(CUFFTResult error, string message, Exception e) : base(message, e) { this.error = error; }
static void Main(string[] args) { // Init and select 1st device. CUDA cuda = new CUDA(0, true); // load module //cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "simpleCUFFT.ptx")); CUfunction func = new CUfunction();// cuda.GetModuleFunction("ComplexPointwiseMulAndScale"); // The filter size is assumed to be a number smaller than the signal size const int SIGNAL_SIZE = 50; const int FILTER_KERNEL_SIZE = 11; // Allocate host memory for the signal Float2[] h_signal = new Float2[SIGNAL_SIZE]; // Initalize the memory for the signal Random r = new Random(); for (int i = 0; i < SIGNAL_SIZE; ++i) { h_signal[i].x = r.Next() / (float)int.MaxValue; h_signal[i].y = 0; } // Allocate host memory for the filter Float2[] h_filter_kernel = new Float2[FILTER_KERNEL_SIZE]; // Initalize the memory for the filter for (int i = 0; i < FILTER_KERNEL_SIZE; ++i) { h_filter_kernel[i].x = r.Next() / (float)int.MaxValue; h_filter_kernel[i].y = 0; } // Pad signal and filter kernel Float2[] h_padded_signal; Float2[] h_padded_filter_kernel; int new_size = PadData(h_signal, out h_padded_signal, SIGNAL_SIZE, h_filter_kernel, out h_padded_filter_kernel, FILTER_KERNEL_SIZE); // Allocate device memory for signal // Copy host memory to device CUdeviceptr d_signal = cuda.CopyHostToDevice <Float2>(h_padded_signal); // Allocate device memory for filter kernel // Copy host memory to device CUdeviceptr d_filter_kernel = cuda.CopyHostToDevice <Float2>(h_padded_filter_kernel); // CUFFT plan CUFFT fft = new CUFFT(cuda); cufftHandle handle = new cufftHandle(); CUFFTResult fftres = CUFFTDriver.cufftPlan1d(ref handle, new_size, CUFFTType.C2C, 1); //fft.Plan1D(new_size, CUFFTType.C2C, 1); return; // Transform signal and kernel fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Forward); fft.ExecuteComplexToComplex(d_filter_kernel, d_filter_kernel, CUFFTDirection.Forward); // Multiply the coefficients together and normalize the result // ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size); cuda.SetFunctionBlockShape(func, 256, 1, 1); cuda.SetParameter(func, 0, (uint)d_signal.Pointer); cuda.SetParameter(func, IntPtr.Size, (uint)d_filter_kernel.Pointer); cuda.SetParameter(func, IntPtr.Size * 2, (uint)new_size); cuda.SetParameter(func, IntPtr.Size * 2 + 4, 1.0f / new_size); cuda.SetParameterSize(func, (uint)(IntPtr.Size * 2 + 8)); cuda.Launch(func, 32, 1); // Transform signal back fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Inverse); // Copy device memory to host Float2[] h_convolved_signal = h_padded_signal; cuda.CopyDeviceToHost <Float2>(d_signal, h_convolved_signal); // Allocate host memory for the convolution result Float2[] h_convolved_signal_ref = new Float2[SIGNAL_SIZE]; // Convolve on the host Convolve(h_signal, SIGNAL_SIZE, h_filter_kernel, FILTER_KERNEL_SIZE, h_convolved_signal_ref); // check result bool res = cutCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 2 * SIGNAL_SIZE, 1e-5f); Console.WriteLine("Test {0}", (true == res) ? "PASSED" : "FAILED"); //Destroy CUFFT context fft.Destroy(); // cleanup memory cuda.Free(d_signal); cuda.Free(d_filter_kernel); }