public PatchTracker(int aMaxWidth, int aMaxHeight, List <int> aTileSizes, List <int> aMaxShifts, List <int> aLevels, CudaContext ctx) { forward = new CudaFFTPlanMany[aLevels.Count]; backward = new CudaFFTPlanMany[aLevels.Count]; //Allocate FFT plans SizeT oldFFTSize = 0; for (int i = 0; i < aTileSizes.Count; i++) { SizeT memFFT = InitFFT(i, aMaxWidth / aLevels[i], aMaxHeight / aLevels[i], aTileSizes[i], aMaxShifts[i]); if (memFFT > oldFFTSize) { oldFFTSize = memFFT; } } FTTBufferSize = oldFFTSize; //find maximum for allocations: for (int i = 0; i < aTileSizes.Count; i++) { currentWidth = aMaxWidth / aLevels[i]; currentHeight = aMaxHeight / aLevels[i]; currentTileSize = aTileSizes[i]; currentMaxShift = aMaxShifts[i]; int currentMaxPixelsShiftImage = (2 * currentMaxShift + 1) * (2 * currentMaxShift + 1) * CurrentBlockCountX * CurrentBlockCountY; maxPixelsShiftImage = Math.Max(currentMaxPixelsShiftImage, maxPixelsShiftImage); int tilePixels = CurrentBlockSize * CurrentBlockSize * CurrentBlockCountX * CurrentBlockCountY; maxPixelsImage = Math.Max(tilePixels, maxPixelsImage); int fftWidth = CurrentBlockSize / 2 + 1; int fftPixels = fftWidth * CurrentBlockSize * CurrentBlockCountX * CurrentBlockCountY; maxPixelsFFT = Math.Max(fftPixels, maxPixelsFFT); maxWidth = Math.Max(aMaxWidth / aLevels[i], maxWidth); maxHeight = Math.Max(aMaxHeight / aLevels[i], maxHeight); maxBlockCountX = Math.Max(maxBlockCountX, CurrentBlockCountX); maxBlockCountY = Math.Max(maxBlockCountY, CurrentBlockCountY); } CUmodule mod = ctx.LoadModule("kernel.ptx"); conjKernel = new conjugateComplexMulKernel(ctx, mod); convertToTiles = new convertToTilesOverlapKernel(ctx, mod); convertToTilesBorder = new convertToTilesOverlapBorderKernel(ctx, mod); squaredSumKernel = new squaredSumKernel(ctx, mod); boxFilterXKernel = new boxFilterWithBorderXKernel(ctx, mod); boxFilterYKernel = new boxFilterWithBorderYKernel(ctx, mod); normalizedCCKernel = new normalizedCCKernel(ctx, mod); findMinimumKernel = new findMinimumKernel(ctx, mod); }
public PreAlignment(NPPImage_32fC1 img, CudaContext ctx) { width = img.WidthRoi; height = img.HeightRoi; imgToTrackRotated = new NPPImage_32fC1(width, height); CUmodule mod = ctx.LoadModule("kernel.ptx"); int fftWidth = width / 2 + 1; conjKernel = new conjugateComplexMulKernel(ctx, mod); fourierFilterKernel = new fourierFilterKernel(ctx, mod); fftshiftKernel = new fftshiftKernel(ctx, mod); squaredSumKernel = new squaredSumKernel(ctx, mod); boxFilterXKernel = new boxFilterWithBorderXKernel(ctx, mod); boxFilterYKernel = new boxFilterWithBorderYKernel(ctx, mod); normalizedCCKernel = new normalizedCCKernel(ctx, mod); findMinimumKernel = new findMinimumKernel(ctx, mod); int n = 2; int[] dims = new int[] { height, width }; int batches = 1; int[] inembed = new int[] { 1, imgToTrackRotated.Pitch / 4 }; int[] onembed = new int[] { 1, fftWidth }; int idist = height * imgToTrackRotated.Pitch / 4; int odist = height * fftWidth; int istride = 1; int ostride = 1; cufftHandle handleForward = cufftHandle.Create(); cufftHandle handleBackward = cufftHandle.Create(); SizeT sizeForward = new SizeT(); SizeT sizeBackward = new SizeT(); forward = new CudaFFTPlanMany(handleForward, n, dims, batches, cufftType.R2C, inembed, istride, idist, onembed, ostride, odist, ref sizeForward, false); backward = new CudaFFTPlanMany(handleBackward, n, dims, batches, cufftType.C2R, onembed, ostride, odist, inembed, istride, idist, ref sizeBackward, false); FFTBufferSize = sizeForward > sizeBackward ? sizeForward : sizeBackward; }