示例#1
0
        /// <summary>
        /// Creates a 3D plan.
        /// </summary>
        /// <param name="fftType">Type of FFT.</param>
        /// <param name="dataType">Data type.</param>
        /// <param name="nx">The number of samples in x dimension.</param>
        /// <param name="ny">The number of samples in y dimension.</param>
        /// <param name="nz">The number of samples in z dimension.</param>
        /// <param name="batchSize">Size of batch.</param>
        /// <returns>Plan.</returns>
        public override FFTPlan3D Plan3D(eFFTType fftType, eDataType dataType, int nx, int ny, int nz, int batchSize)
        {
            int         insize, outsize;
            CUFFTType   cuFFTType = VerifyTypes(fftType, dataType, out insize, out outsize);
            cufftHandle handle    = new cufftHandle();

            CUFFTResult res;

            if (batchSize <= 1)
            {
                res = _driver.cufftPlan3d(ref handle, nx, ny, nz, cuFFTType);
            }
            else
            {
                res = _driver.cufftPlanMany(ref handle, 3, new int[] { nx, ny, nz }, null, 1, 0, null, 1, 0, cuFFTType, batchSize);
            }

            if (res != CUFFTResult.Success)
            {
                throw new CudafyHostException(res.ToString());
            }
            FFTPlan3D   plan   = new FFTPlan3D(nx, ny, nz, batchSize, this);
            FFTPlan3DEx planEx = new FFTPlan3DEx(plan)
            {
                CudaFFTHandle = handle, CudaFFTType = cuFFTType, DataType = dataType
            };

            Plans.Add(plan, planEx);
            return(plan);
        }
示例#2
0
        private SizeT InitFFT(int i, int width, int height, int tileSize, int maxShift)
        {
            int blockSize   = tileSize + 2 * maxShift;
            int blockCountX = (width - maxShift * 2) / tileSize;
            int blockCountY = (height - maxShift * 2) / tileSize;

            int fftWidth = blockSize / 2 + 1;
            int n        = 2;

            int[] dims    = new int[] { blockSize, blockSize };
            int   batches = blockCountX * blockCountY;

            int[] inembed = new int[] { 1, blockSize };
            int[] onembed = new int[] { 1, fftWidth };
            int   idist   = blockSize * blockSize;
            int   odist   = blockSize * fftWidth;
            int   istride = 1;
            int   ostride = 1;

            cufftHandle handleForward  = cufftHandle.Create();
            cufftHandle handleBackward = cufftHandle.Create();

            SizeT sizeForward  = new SizeT();
            SizeT sizeBackward = new SizeT();

            forward[i]  = new CudaFFTPlanMany(handleForward, n, dims, batches, cufftType.R2C, inembed, istride, idist, onembed, ostride, odist, ref sizeForward, false);
            backward[i] = new CudaFFTPlanMany(handleBackward, n, dims, batches, cufftType.C2R, onembed, ostride, odist, inembed, istride, idist, ref sizeBackward, false);

            Console.WriteLine("Size FFT forward: " + sizeForward.ToString() + " backward: " + sizeBackward.ToString());

            return(sizeForward > sizeBackward ? sizeForward : sizeBackward);
        }
示例#3
0
        public PreAlignment(NPPImage_32fC1 img, CudaContext ctx)
        {
            width             = img.WidthRoi;
            height            = img.HeightRoi;
            imgToTrackRotated = new NPPImage_32fC1(width, height);

            CUmodule mod = ctx.LoadModule("kernel.ptx");

            int fftWidth = width / 2 + 1;

            conjKernel          = new conjugateComplexMulKernel(ctx, mod);
            fourierFilterKernel = new fourierFilterKernel(ctx, mod);
            fftshiftKernel      = new fftshiftKernel(ctx, mod);

            squaredSumKernel   = new squaredSumKernel(ctx, mod);
            boxFilterXKernel   = new boxFilterWithBorderXKernel(ctx, mod);
            boxFilterYKernel   = new boxFilterWithBorderYKernel(ctx, mod);
            normalizedCCKernel = new normalizedCCKernel(ctx, mod);
            findMinimumKernel  = new findMinimumKernel(ctx, mod);



            int n = 2;

            int[] dims    = new int[] { height, width };
            int   batches = 1;

            int[] inembed = new int[] { 1, imgToTrackRotated.Pitch / 4 };
            int[] onembed = new int[] { 1, fftWidth };
            int   idist   = height * imgToTrackRotated.Pitch / 4;
            int   odist   = height * fftWidth;
            int   istride = 1;
            int   ostride = 1;

            cufftHandle handleForward  = cufftHandle.Create();
            cufftHandle handleBackward = cufftHandle.Create();

            SizeT sizeForward  = new SizeT();
            SizeT sizeBackward = new SizeT();

            forward  = new CudaFFTPlanMany(handleForward, n, dims, batches, cufftType.R2C, inembed, istride, idist, onembed, ostride, odist, ref sizeForward, false);
            backward = new CudaFFTPlanMany(handleBackward, n, dims, batches, cufftType.C2R, onembed, ostride, odist, inembed, istride, idist, ref sizeBackward, false);

            FFTBufferSize = sizeForward > sizeBackward ? sizeForward : sizeBackward;
        }
示例#4
0
        public override FFTPlan1D Plan1D(eFFTType fftType, eDataType dataType, int nx, int batchSize, int istride, int idist, int ostride, int odist)
        {
            int         insize, outsize;
            CUFFTType   cuFFTType = VerifyTypes(fftType, dataType, out insize, out outsize);
            cufftHandle handle    = new cufftHandle();
            CUFFTResult res;

            if (batchSize <= 1)
            {
                res = _driver.cufftPlan1d(ref handle, nx, cuFFTType, batchSize);
            }
            else
            {
                res = _driver.cufftPlanMany(ref handle, 1, new int[] { nx },
                                            new int[] { idist }, //inembed
                                            istride,             //istride
                                            idist,               //idist
                                            new int[] { odist }, //onembed
                                            ostride,             //ostride
                                            odist,               //odist
                                            cuFFTType,
                                            batchSize);
            }

            if (res != CUFFTResult.Success)
            {
                throw new CudafyHostException(res.ToString());
            }
            FFTPlan1D   plan   = new FFTPlan1D(nx, batchSize, this);
            FFTPlan1DEx planEx = new FFTPlan1DEx(plan)
            {
                CudaFFTHandle = handle, CudaFFTType = cuFFTType, DataType = dataType
            };

            Plans.Add(plan, planEx);
            return(plan);
        }
示例#5
0
 private static extern CUFFTResult cufftExecZ2D_ext(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata);
示例#6
0
 public CUFFTResult cufftSetCompatibilityMode(cufftHandle plan, CUFFTCompatibility mode)
 {
     return(cufftSetCompatibilityMode_ext(plan, mode));
 }
 public static extern CUFFTResult cufftSetStream(cufftHandle p, cudaStream stream);
示例#8
0
 public CUFFTResult cufftPlan1d(ref cufftHandle plan, int nx, CUFFTType type, int batch)
 {
     return(cufftPlan1d_ext(ref plan, nx, type, batch));
 }
示例#9
0
 public CUFFTResult cufftExecZ2D(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata)
 {
     return(cufftExecZ2D_ext(plan, idata, odata));
 }
示例#10
0
 public void ExecuteRealToComplex(cufftHandle plan, CUdeviceptr input, CUdeviceptr output)
 {
     this.LastError = _driver.cufftExecR2C(plan, input, output);
 }
示例#11
0
 private static extern CUFFTResult cufftPlan3d_ext(ref cufftHandle plan, int nx, int ny, int nz, CUFFTType type);
示例#12
0
 public void ExecuteComplexToComplex(cufftHandle plan, CUdeviceptr input, CUdeviceptr output, CUFFTDirection direction)
 {
     this.LastError = _driver.cufftExecC2C(plan, input, output, direction);
 }
示例#13
0
        static void Main(string[] args)
        {
            // Init and select 1st device.
            CUDA cuda = new CUDA(0, true);

            // load module
            //cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "simpleCUFFT.ptx"));
            CUfunction func = new CUfunction();// cuda.GetModuleFunction("ComplexPointwiseMulAndScale");

            // The filter size is assumed to be a number smaller than the signal size
            const int SIGNAL_SIZE        = 50;
            const int FILTER_KERNEL_SIZE = 11;

            // Allocate host memory for the signal
            Float2[] h_signal = new Float2[SIGNAL_SIZE];
            // Initalize the memory for the signal
            Random r = new Random();

            for (int i = 0; i < SIGNAL_SIZE; ++i)
            {
                h_signal[i].x = r.Next() / (float)int.MaxValue;
                h_signal[i].y = 0;
            }

            // Allocate host memory for the filter
            Float2[] h_filter_kernel = new Float2[FILTER_KERNEL_SIZE];
            // Initalize the memory for the filter
            for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
            {
                h_filter_kernel[i].x = r.Next() / (float)int.MaxValue;
                h_filter_kernel[i].y = 0;
            }

            // Pad signal and filter kernel
            Float2[] h_padded_signal;
            Float2[] h_padded_filter_kernel;
            int      new_size = PadData(h_signal, out h_padded_signal, SIGNAL_SIZE,
                                        h_filter_kernel, out h_padded_filter_kernel, FILTER_KERNEL_SIZE);

            // Allocate device memory for signal
            // Copy host memory to device
            CUdeviceptr d_signal = cuda.CopyHostToDevice <Float2>(h_padded_signal);

            // Allocate device memory for filter kernel
            // Copy host memory to device
            CUdeviceptr d_filter_kernel = cuda.CopyHostToDevice <Float2>(h_padded_filter_kernel);

            // CUFFT plan
            CUFFT       fft    = new CUFFT(cuda);
            cufftHandle handle = new cufftHandle();
            CUFFTResult fftres = CUFFTDriver.cufftPlan1d(ref handle, new_size, CUFFTType.C2C, 1);

            //fft.Plan1D(new_size, CUFFTType.C2C, 1);


            return;

            // Transform signal and kernel
            fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Forward);
            fft.ExecuteComplexToComplex(d_filter_kernel, d_filter_kernel, CUFFTDirection.Forward);

            // Multiply the coefficients together and normalize the result
            // ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size);
            cuda.SetFunctionBlockShape(func, 256, 1, 1);
            cuda.SetParameter(func, 0, (uint)d_signal.Pointer);
            cuda.SetParameter(func, IntPtr.Size, (uint)d_filter_kernel.Pointer);
            cuda.SetParameter(func, IntPtr.Size * 2, (uint)new_size);
            cuda.SetParameter(func, IntPtr.Size * 2 + 4, 1.0f / new_size);
            cuda.SetParameterSize(func, (uint)(IntPtr.Size * 2 + 8));
            cuda.Launch(func, 32, 1);

            // Transform signal back
            fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Inverse);

            // Copy device memory to host
            Float2[] h_convolved_signal = h_padded_signal;
            cuda.CopyDeviceToHost <Float2>(d_signal, h_convolved_signal);

            // Allocate host memory for the convolution result
            Float2[] h_convolved_signal_ref = new Float2[SIGNAL_SIZE];

            // Convolve on the host
            Convolve(h_signal, SIGNAL_SIZE,
                     h_filter_kernel, FILTER_KERNEL_SIZE,
                     h_convolved_signal_ref);

            // check result
            bool res = cutCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 2 * SIGNAL_SIZE, 1e-5f);

            Console.WriteLine("Test {0}", (true == res) ? "PASSED" : "FAILED");

            //Destroy CUFFT context
            fft.Destroy();

            // cleanup memory
            cuda.Free(d_signal);
            cuda.Free(d_filter_kernel);
        }
示例#14
0
 public void Destroy(cufftHandle plan)
 {
     this.LastError = _driver.cufftDestroy(plan);
 }
示例#15
0
 public cufftHandle Plan3D(int nx, int ny, int nz, CUFFTType type)
 {
     this.plan      = new cufftHandle();
     this.LastError = _driver.cufftPlan3d(ref this.plan, nx, ny, nz, type);
     return(this.plan);
 }
示例#16
0
 public cufftHandle Plan1D(int nx, CUFFTType type, int batch)
 {
     this.plan      = new cufftHandle();
     this.LastError = _driver.cufftPlan1d(ref this.plan, nx, type, batch);
     return(this.plan);
 }
示例#17
0
 private static extern CUFFTResult cufftExecZ2Z_ext(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata, CUFFTDirection direction);
示例#18
0
 private static extern CUFFTResult cufftPlanMany_ext(ref cufftHandle plan, int rank, [In, Out] int[] n, [In, Out] int[] inembed, int istride, int idist, [In, Out] int[] onembed, int ostride, int odist, CUFFTType type, int batch);
示例#19
0
 private static extern CUFFTResult cufftPlan1d_ext(ref cufftHandle plan, int nx, CUFFTType type, int batch);
示例#20
0
 private static extern CUFFTResult cufftSetCompatibilityMode_ext(cufftHandle plan, CUFFTCompatibility mode);
示例#21
0
 private static extern CUFFTResult cufftPlanMany_ext(ref cufftHandle plan, int rank, IntPtr n, IntPtr inembed, int istride, int idist, IntPtr onembed, int ostride, int odist, CUFFTType type, int batch);
示例#22
0
 public void ExecuteComplexToReal(cufftHandle plan, CUdeviceptr input, CUdeviceptr output)
 {
     this.LastError = _driver.cufftExecC2R(plan, input, output);
 }
示例#23
0
 private static extern CUFFTResult cufftSetStream_ext(cufftHandle p, cudaStream stream);
示例#24
0
 private static extern CUFFTResult cufftDestroy_ext(cufftHandle plan);
示例#25
0
 public CUFFTResult cufftDestroy(cufftHandle plan)
 {
     return(cufftDestroy_ext(plan));
 }
示例#26
0
 public CUFFTResult cufftPlan3d(ref cufftHandle plan, int nx, int ny, int nz, CUFFTType type)
 {
     return(cufftPlan3d_ext(ref plan, nx, ny, nz, type));
 }
示例#27
0
 public CUFFTResult cufftExecZ2Z(cufftHandle plan, CUdeviceptr idata, CUdeviceptr odata, CUFFTDirection direction)
 {
     return(cufftExecZ2Z_ext(plan, idata, odata, direction));
 }
示例#28
0
 public CUFFTResult cufftPlanMany(ref cufftHandle plan, int rank, int[] n, int[] inembed, int istride, int idist, int[] onembed, int ostride, int odist, CUFFTType type, int batch)
 {
     return(cufftPlanMany_ext(ref plan, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch));
 }
示例#29
0
 public CUFFTResult cufftSetStream(cufftHandle plan, cudaStream stream)
 {
     return(cufftSetStream_ext(plan, stream));
 }
 public static extern CUFFTResult cufftPlan3d(ref cufftHandle plan, int nx, int ny, int nz, CUFFTType type);