/// <summary> /// Creates a 3D plan. /// </summary> /// <param name="fftType">Type of FFT.</param> /// <param name="dataType">Data type.</param> /// <param name="nx">The number of samples in x dimension.</param> /// <param name="ny">The number of samples in y dimension.</param> /// <param name="nz">The number of samples in z dimension.</param> /// <param name="batchSize">Size of batch.</param> /// <returns>Plan.</returns> public override FFTPlan3D Plan3D(eFFTType fftType, eDataType dataType, int nx, int ny, int nz, int batchSize) { int insize, outsize; CUFFTType cuFFTType = VerifyTypes(fftType, dataType, out insize, out outsize); cufftHandle handle = new cufftHandle(); CUFFTResult res; if (batchSize <= 1) { res = _driver.cufftPlan3d(ref handle, nx, ny, nz, cuFFTType); } else { res = _driver.cufftPlanMany(ref handle, 3, new int[] { nx, ny, nz }, null, 1, 0, null, 1, 0, cuFFTType, batchSize); } if (res != CUFFTResult.Success) { throw new CudafyHostException(res.ToString()); } FFTPlan3D plan = new FFTPlan3D(nx, ny, nz, batchSize, this); FFTPlan3DEx planEx = new FFTPlan3DEx(plan) { CudaFFTHandle = handle, CudaFFTType = cuFFTType, DataType = dataType }; Plans.Add(plan, planEx); return(plan); }
private SizeT InitFFT(int i, int width, int height, int tileSize, int maxShift) { int blockSize = tileSize + 2 * maxShift; int blockCountX = (width - maxShift * 2) / tileSize; int blockCountY = (height - maxShift * 2) / tileSize; int fftWidth = blockSize / 2 + 1; int n = 2; int[] dims = new int[] { blockSize, blockSize }; int batches = blockCountX * blockCountY; int[] inembed = new int[] { 1, blockSize }; int[] onembed = new int[] { 1, fftWidth }; int idist = blockSize * blockSize; int odist = blockSize * fftWidth; int istride = 1; int ostride = 1; cufftHandle handleForward = cufftHandle.Create(); cufftHandle handleBackward = cufftHandle.Create(); SizeT sizeForward = new SizeT(); SizeT sizeBackward = new SizeT(); forward[i] = new CudaFFTPlanMany(handleForward, n, dims, batches, cufftType.R2C, inembed, istride, idist, onembed, ostride, odist, ref sizeForward, false); backward[i] = new CudaFFTPlanMany(handleBackward, n, dims, batches, cufftType.C2R, onembed, ostride, odist, inembed, istride, idist, ref sizeBackward, false); Console.WriteLine("Size FFT forward: " + sizeForward.ToString() + " backward: " + sizeBackward.ToString()); return(sizeForward > sizeBackward ? sizeForward : sizeBackward); }
public PreAlignment(NPPImage_32fC1 img, CudaContext ctx) { width = img.WidthRoi; height = img.HeightRoi; imgToTrackRotated = new NPPImage_32fC1(width, height); CUmodule mod = ctx.LoadModule("kernel.ptx"); int fftWidth = width / 2 + 1; conjKernel = new conjugateComplexMulKernel(ctx, mod); fourierFilterKernel = new fourierFilterKernel(ctx, mod); fftshiftKernel = new fftshiftKernel(ctx, mod); squaredSumKernel = new squaredSumKernel(ctx, mod); boxFilterXKernel = new boxFilterWithBorderXKernel(ctx, mod); boxFilterYKernel = new boxFilterWithBorderYKernel(ctx, mod); normalizedCCKernel = new normalizedCCKernel(ctx, mod); findMinimumKernel = new findMinimumKernel(ctx, mod); int n = 2; int[] dims = new int[] { height, width }; int batches = 1; int[] inembed = new int[] { 1, imgToTrackRotated.Pitch / 4 }; int[] onembed = new int[] { 1, fftWidth }; int idist = height * imgToTrackRotated.Pitch / 4; int odist = height * fftWidth; int istride = 1; int ostride = 1; cufftHandle handleForward = cufftHandle.Create(); cufftHandle handleBackward = cufftHandle.Create(); SizeT sizeForward = new SizeT(); SizeT sizeBackward = new SizeT(); forward = new CudaFFTPlanMany(handleForward, n, dims, batches, cufftType.R2C, inembed, istride, idist, onembed, ostride, odist, ref sizeForward, false); backward = new CudaFFTPlanMany(handleBackward, n, dims, batches, cufftType.C2R, onembed, ostride, odist, inembed, istride, idist, ref sizeBackward, false); FFTBufferSize = sizeForward > sizeBackward ? sizeForward : sizeBackward; }
public override FFTPlan1D Plan1D(eFFTType fftType, eDataType dataType, int nx, int batchSize, int istride, int idist, int ostride, int odist) { int insize, outsize; CUFFTType cuFFTType = VerifyTypes(fftType, dataType, out insize, out outsize); cufftHandle handle = new cufftHandle(); CUFFTResult res; if (batchSize <= 1) { res = _driver.cufftPlan1d(ref handle, nx, cuFFTType, batchSize); } else { res = _driver.cufftPlanMany(ref handle, 1, new int[] { nx }, new int[] { idist }, //inembed istride, //istride idist, //idist new int[] { odist }, //onembed ostride, //ostride odist, //odist cuFFTType, batchSize); } if (res != CUFFTResult.Success) { throw new CudafyHostException(res.ToString()); } FFTPlan1D plan = new FFTPlan1D(nx, batchSize, this); FFTPlan1DEx planEx = new FFTPlan1DEx(plan) { CudaFFTHandle = handle, CudaFFTType = cuFFTType, DataType = dataType }; Plans.Add(plan, planEx); return(plan); }
private static extern CUFFTResult cufftExecZ2D_ext(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata);
public CUFFTResult cufftSetCompatibilityMode(cufftHandle plan, CUFFTCompatibility mode) { return(cufftSetCompatibilityMode_ext(plan, mode)); }
public static extern CUFFTResult cufftSetStream(cufftHandle p, cudaStream stream);
public CUFFTResult cufftPlan1d(ref cufftHandle plan, int nx, CUFFTType type, int batch) { return(cufftPlan1d_ext(ref plan, nx, type, batch)); }
public CUFFTResult cufftExecZ2D(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata) { return(cufftExecZ2D_ext(plan, idata, odata)); }
public void ExecuteRealToComplex(cufftHandle plan, CUdeviceptr input, CUdeviceptr output) { this.LastError = _driver.cufftExecR2C(plan, input, output); }
private static extern CUFFTResult cufftPlan3d_ext(ref cufftHandle plan, int nx, int ny, int nz, CUFFTType type);
public void ExecuteComplexToComplex(cufftHandle plan, CUdeviceptr input, CUdeviceptr output, CUFFTDirection direction) { this.LastError = _driver.cufftExecC2C(plan, input, output, direction); }
static void Main(string[] args) { // Init and select 1st device. CUDA cuda = new CUDA(0, true); // load module //cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "simpleCUFFT.ptx")); CUfunction func = new CUfunction();// cuda.GetModuleFunction("ComplexPointwiseMulAndScale"); // The filter size is assumed to be a number smaller than the signal size const int SIGNAL_SIZE = 50; const int FILTER_KERNEL_SIZE = 11; // Allocate host memory for the signal Float2[] h_signal = new Float2[SIGNAL_SIZE]; // Initalize the memory for the signal Random r = new Random(); for (int i = 0; i < SIGNAL_SIZE; ++i) { h_signal[i].x = r.Next() / (float)int.MaxValue; h_signal[i].y = 0; } // Allocate host memory for the filter Float2[] h_filter_kernel = new Float2[FILTER_KERNEL_SIZE]; // Initalize the memory for the filter for (int i = 0; i < FILTER_KERNEL_SIZE; ++i) { h_filter_kernel[i].x = r.Next() / (float)int.MaxValue; h_filter_kernel[i].y = 0; } // Pad signal and filter kernel Float2[] h_padded_signal; Float2[] h_padded_filter_kernel; int new_size = PadData(h_signal, out h_padded_signal, SIGNAL_SIZE, h_filter_kernel, out h_padded_filter_kernel, FILTER_KERNEL_SIZE); // Allocate device memory for signal // Copy host memory to device CUdeviceptr d_signal = cuda.CopyHostToDevice <Float2>(h_padded_signal); // Allocate device memory for filter kernel // Copy host memory to device CUdeviceptr d_filter_kernel = cuda.CopyHostToDevice <Float2>(h_padded_filter_kernel); // CUFFT plan CUFFT fft = new CUFFT(cuda); cufftHandle handle = new cufftHandle(); CUFFTResult fftres = CUFFTDriver.cufftPlan1d(ref handle, new_size, CUFFTType.C2C, 1); //fft.Plan1D(new_size, CUFFTType.C2C, 1); return; // Transform signal and kernel fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Forward); fft.ExecuteComplexToComplex(d_filter_kernel, d_filter_kernel, CUFFTDirection.Forward); // Multiply the coefficients together and normalize the result // ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size); cuda.SetFunctionBlockShape(func, 256, 1, 1); cuda.SetParameter(func, 0, (uint)d_signal.Pointer); cuda.SetParameter(func, IntPtr.Size, (uint)d_filter_kernel.Pointer); cuda.SetParameter(func, IntPtr.Size * 2, (uint)new_size); cuda.SetParameter(func, IntPtr.Size * 2 + 4, 1.0f / new_size); cuda.SetParameterSize(func, (uint)(IntPtr.Size * 2 + 8)); cuda.Launch(func, 32, 1); // Transform signal back fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Inverse); // Copy device memory to host Float2[] h_convolved_signal = h_padded_signal; cuda.CopyDeviceToHost <Float2>(d_signal, h_convolved_signal); // Allocate host memory for the convolution result Float2[] h_convolved_signal_ref = new Float2[SIGNAL_SIZE]; // Convolve on the host Convolve(h_signal, SIGNAL_SIZE, h_filter_kernel, FILTER_KERNEL_SIZE, h_convolved_signal_ref); // check result bool res = cutCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 2 * SIGNAL_SIZE, 1e-5f); Console.WriteLine("Test {0}", (true == res) ? "PASSED" : "FAILED"); //Destroy CUFFT context fft.Destroy(); // cleanup memory cuda.Free(d_signal); cuda.Free(d_filter_kernel); }
public void Destroy(cufftHandle plan) { this.LastError = _driver.cufftDestroy(plan); }
public cufftHandle Plan3D(int nx, int ny, int nz, CUFFTType type) { this.plan = new cufftHandle(); this.LastError = _driver.cufftPlan3d(ref this.plan, nx, ny, nz, type); return(this.plan); }
public cufftHandle Plan1D(int nx, CUFFTType type, int batch) { this.plan = new cufftHandle(); this.LastError = _driver.cufftPlan1d(ref this.plan, nx, type, batch); return(this.plan); }
private static extern CUFFTResult cufftExecZ2Z_ext(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata, CUFFTDirection direction);
private static extern CUFFTResult cufftPlanMany_ext(ref cufftHandle plan, int rank, [In, Out] int[] n, [In, Out] int[] inembed, int istride, int idist, [In, Out] int[] onembed, int ostride, int odist, CUFFTType type, int batch);
private static extern CUFFTResult cufftPlan1d_ext(ref cufftHandle plan, int nx, CUFFTType type, int batch);
private static extern CUFFTResult cufftSetCompatibilityMode_ext(cufftHandle plan, CUFFTCompatibility mode);
private static extern CUFFTResult cufftPlanMany_ext(ref cufftHandle plan, int rank, IntPtr n, IntPtr inembed, int istride, int idist, IntPtr onembed, int ostride, int odist, CUFFTType type, int batch);
public void ExecuteComplexToReal(cufftHandle plan, CUdeviceptr input, CUdeviceptr output) { this.LastError = _driver.cufftExecC2R(plan, input, output); }
private static extern CUFFTResult cufftSetStream_ext(cufftHandle p, cudaStream stream);
private static extern CUFFTResult cufftDestroy_ext(cufftHandle plan);
public CUFFTResult cufftDestroy(cufftHandle plan) { return(cufftDestroy_ext(plan)); }
public CUFFTResult cufftPlan3d(ref cufftHandle plan, int nx, int ny, int nz, CUFFTType type) { return(cufftPlan3d_ext(ref plan, nx, ny, nz, type)); }
public CUFFTResult cufftExecZ2Z(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata, CUFFTDirection direction) { return(cufftExecZ2Z_ext(plan, idata, odata, direction)); }
public CUFFTResult cufftPlanMany(ref cufftHandle plan, int rank, int[] n, int[] inembed, int istride, int idist, int[] onembed, int ostride, int odist, CUFFTType type, int batch) { return(cufftPlanMany_ext(ref plan, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch)); }
public CUFFTResult cufftSetStream(cufftHandle plan, cudaStream stream) { return(cufftSetStream_ext(plan, stream)); }
public static extern CUFFTResult cufftPlan3d(ref cufftHandle plan, int nx, int ny, int nz, CUFFTType type);