public float RunSafe(NPPImage_32fC2 inShift, NPPImage_32fC2 outShift, int oldLevel, int newLevel, int oldCountX, int oldCountY, int newCountX, int newCountY, int oldTileSize, int newTileSize) { this.BlockDimensions = new dim3(16, 16, 1); this.SetComputeSize((uint)(newCountX), (uint)(newCountY), 1); return(this.Run(inShift.DevicePointerRoi, outShift.DevicePointerRoi, inShift.Pitch, outShift.Pitch, oldLevel, newLevel, oldCountX, oldCountY, newCountX, newCountY, oldTileSize, newTileSize)); }
public float RunSafe(CudaDeviceVariable <float> shiftImage, NPPImage_32fC2 coordinates, int maxShift, int tileCountX, int tileCountY, float threshold) { this.BlockDimensions = new dim3(128, 1, 1); this.SetComputeSize((uint)(tileCountX * tileCountY), 1, 1); return(this.Run(shiftImage.DevicePointer, coordinates.DevicePointerRoi, coordinates.Pitch, maxShift, tileCountX * tileCountY, tileCountX, threshold)); }
public float RunSafe(NPPImage_32fC2 optimalShifts, CudaDeviceVariable <float2> bestShifts, int imageCount, int referenceImage, int imageToTrack) { this.BlockDimensions = new dim3(32, 8, 1); this.SetComputeSize((uint)(optimalShifts.WidthRoi), (uint)(optimalShifts.HeightRoi), 1); return(this.Run(optimalShifts.DevicePointer, bestShifts.DevicePointer, imageCount, optimalShifts.WidthRoi, optimalShifts.HeightRoi, optimalShifts.Pitch, referenceImage, imageToTrack)); }
private void Swap(ref NPPImage_32fC2 a, ref NPPImage_32fC2 b) { NPPImage_32fC2 temp = a; a = b; b = temp; }
public float RunSafe(NPPImage_32fC2 shifts, NPPImage_32fC1 imFx, NPPImage_32fC1 imFy, NPPImage_32fC1 imFt, float minDet, int windowSize) { this.BlockDimensions = new dim3(32, 16, 1); this.SetComputeSize((uint)(shifts.WidthRoi), (uint)(shifts.HeightRoi), 1); //this.DynamicSharedMemory = (uint)(5 * windowSize * windowSize) * BlockDimensions.x * BlockDimensions.y * sizeof(float); int windowSizeHalf = windowSize / 2; return(this.Run(shifts.DevicePointerRoi, imFx.DevicePointerRoi, imFy.DevicePointerRoi, imFt.DevicePointerRoi, shifts.Pitch, imFx.Pitch, shifts.WidthRoi, shifts.HeightRoi, windowSizeHalf, minDet)); }
public NPPImage_32fC2 getOptimalShift(int imageToTrack) { //assume measured shifts are not used anymore: NPPImage_32fC2 ret = shifts[0]; ret.ResetRoi(); //float2[] test2 = shiftsOneToOne_d; getOptimalShifts.RunSafe(ret, shiftsOneToOne_d, frameCount, referenceIndex, imageToTrack); //float2[] test = ret.ToCudaPitchedDeviceVariable(); return(ret); }
//float2* __restrict__ outImg, //cudaTextureObject_t texObjShiftXY, //int imgWidth, //int imgHeight, //int imgPitch public float RunSafe(NPPImage_32fC2 inFlow, NPPImage_32fC2 outFlow, float2 baseShift, float baseRotation, int tileSize, int tileCountX, int tileCountY) { this.BlockDimensions = new dim3(32, 6, 1); this.SetComputeSize((uint)(outFlow.WidthRoi), (uint)(outFlow.HeightRoi), 1); CudaResourceDesc descImg = new CudaResourceDesc(inFlow); CudaTextureDescriptor texDescImg = new CudaTextureDescriptor(CUAddressMode.Clamp, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates); CudaTexObject texImg = new CudaTexObject(descImg, texDescImg); float t = this.Run(outFlow.DevicePointerRoi, texImg.TexObject, tileSize, tileCountX, tileCountY, outFlow.WidthRoi, outFlow.HeightRoi, outFlow.Pitch, baseShift, baseRotation); texImg.Dispose(); return(t); }
//int width, int height, int stride, //cudaTextureObject_t texUV, float* __restrict__ out, cudaTextureObject_t texToWarp public float RunSafe(NPPImage_32fC1 inImg, NPPImage_32fC1 outImg, NPPImage_32fC2 flow) { this.BlockDimensions = new dim3(32, 6, 1); this.SetComputeSize((uint)(outImg.WidthRoi), (uint)(outImg.HeightRoi), 1); CudaResourceDesc descImg = new CudaResourceDesc(inImg); CudaTextureDescriptor texDescImg = new CudaTextureDescriptor(CUAddressMode.Mirror, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates); CudaTexObject texImg = new CudaTexObject(descImg, texDescImg); CudaResourceDesc descFlow = new CudaResourceDesc(flow); CudaTextureDescriptor texDescFlow = new CudaTextureDescriptor(CUAddressMode.Clamp, CUFilterMode.Point, CUTexRefSetFlags.NormalizedCoordinates); CudaTexObject texFlow = new CudaTexObject(descFlow, texDescFlow); return(this.Run(outImg.WidthRoi, outImg.HeightRoi, outImg.Pitch, texFlow.TexObject, outImg.DevicePointerRoi, texImg.TexObject)); }
public void LucasKanade(NPPImage_32fC1 sourceImg, NPPImage_32fC1 targetImg, NPPImage_32fC2 tiledFlow, int tileSize, int tileCountX, int tileCountY, int iterations, float2 baseShift, float baseRotation, float minDet, int windowSize) { createFlowFieldFromTiles.RunSafe(tiledFlow, d_flow, baseShift, baseRotation, tileSize, tileCountX, tileCountY); for (int iter = 0; iter < iterations; iter++) { warpingKernel.RunSafe(sourceImg, d_tmp, d_flow); NppiPoint p = new NppiPoint(0, 0); d_Ix.Set(0); d_Iy.Set(0); d_Iz.Set(0); computeDerivativesKernel.RunSafe(d_tmp, targetImg, d_Ix, d_Iy, d_Iz); lukasKanade.RunSafe(d_flow, d_Ix, d_Iy, d_Iz, minDet, windowSize); } warpingKernel.RunSafe(sourceImg, d_tmp, d_flow); d_tmp.Copy(sourceImg); }
public void Track(NPPImage_32fC1 imgTrack, NPPImage_32fC1 imgRef, NPPImage_32fC2 preShift, int i, float2 baseShiftRef, float baseRotationRef, float2 baseShifttoTrack, float baseRotationtoTrack, float threshold) { if (imgTrack.WidthRoi != imgRef.WidthRoi || imgTrack.HeightRoi != imgRef.HeightRoi || imgTrack.WidthRoi != currentWidth || imgTrack.HeightRoi != currentHeight) { throw new ArgumentOutOfRangeException(); } int level = imgTrack.Width / imgTrack.WidthRoi; convertToTilesBorder.RunSafe(imgRef, imgRefSortedTiles, currentTileSize, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, baseShiftRef, baseRotationRef); //template forward[i].Exec(imgRefSortedTiles.DevicePointer, imgRefCplx.DevicePointer); convertToTiles.RunSafe(imgTrack, imgToTrackSortedTiles, preShift, currentTileSize, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, baseShifttoTrack, baseRotationtoTrack); //image in paper //DumpFloat(imgToTrackSortedTiles, currentTileSize + 2* currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesTrack_" + level + "_" + debugCallCounter + ".bin"); //DumpFloat(imgRefSortedTiles, currentTileSize + 2 * currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesRef_" + level + "_" + debugCallCounter + ".bin"); forward[i].Exec(imgToTrackSortedTiles.DevicePointer, imgToTrackCplx.DevicePointer); conjKernel.RunSafe(imgRefCplx, imgToTrackCplx); backward[i].Exec(imgToTrackCplx.DevicePointer, imgCrossCorrelation.DevicePointer); imgCrossCorrelation.DivC(CurrentBlockSize * CurrentBlockSize); squaredSumKernel.RunSafe(imgRefSortedTiles, squaredSumsOfTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); //DumpFloat(squaredSumsOfTiles, 1, 1, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "squaredSums_" + level + "_" + debugCallCounter + ".bin"); boxFilterXKernel.RunSafe(imgToTrackSortedTiles, imgRefSortedTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); boxFilterYKernel.RunSafe(imgRefSortedTiles, imgToTrackSortedTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); //DumpFloat(imgToTrackSortedTiles, currentTileSize + 2 * currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "boxFilter_" + level + "_" + debugCallCounter + ".bin"); normalizedCCKernel.RunSafe(imgCrossCorrelation, squaredSumsOfTiles, imgToTrackSortedTiles, shiftImages, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); //DumpFloat(shiftImages, (2 * currentMaxShift + 1), (2 * currentMaxShift + 1), CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesShift_" + level + "_" + debugCallCounter + ".bin"); patchShift.SetRoi(0, 0, CurrentBlockCountX, CurrentBlockCountY); findMinimumKernel.RunSafe(shiftImages, patchShift, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, threshold); NPPImage_32fC1 preShiftFloat = new NPPImage_32fC1(preShift.DevicePointer, 2 * CurrentBlockCountX, CurrentBlockCountY, preShift.Pitch); NPPImage_32fC1 patchShiftFloat = new NPPImage_32fC1(patchShift.DevicePointer, 2 * CurrentBlockCountX, CurrentBlockCountY, patchShift.Pitch); preShiftFloat.Add(patchShiftFloat); debugCallCounter++; }
public void DumpFlowField(NPPImage_32fC2 flow, string filename) { float2[] f = new float2[flow.WidthRoi * flow.HeightRoi]; flow.CopyToHostRoi(f, new NppiRect(0, 0, flow.WidthRoi, flow.HeightRoi)); FileStream fs = File.OpenWrite(filename); BinaryWriter bw = new BinaryWriter(fs); bw.Write(flow.WidthRoi); bw.Write(flow.HeightRoi); for (int i = 0; i < f.Length; i++) { bw.Write(f[i].x); bw.Write(f[i].y); } bw.Close(); fs.Close(); bw.Dispose(); fs.Dispose(); }
private void DumpFlowField(NPPImage_32fC2 flow, string filename) { float2[] f = new float2[flow.Width * flow.Height]; flow.CopyToHost(f); FileStream fs = File.OpenWrite(filename); BinaryWriter bw = new BinaryWriter(fs); bw.Write(flow.Width); bw.Write(flow.Height); for (int i = 0; i < f.Length; i++) { bw.Write(f[i].x); bw.Write(f[i].y); } bw.Close(); fs.Close(); bw.Dispose(); fs.Dispose(); }
public OpticalFlow(int width, int height, CudaContext ctx) { CUmodule mod = ctx.LoadModulePTX("opticalFlow.ptx"); warpingKernel = new WarpingKernel(ctx, mod); createFlowFieldFromTiles = new CreateFlowFieldFromTiles(ctx, mod); computeDerivativesKernel = new ComputeDerivativesKernel(ctx, mod); lukasKanade = new LukasKanadeKernel(ctx, mod); d_tmp = new NPPImage_32fC1(width, height); d_Ix = new NPPImage_32fC1(width, height); d_Iy = new NPPImage_32fC1(width, height); d_Iz = new NPPImage_32fC1(width, height); d_flow = new NPPImage_32fC2(width, height); buffer = new CudaDeviceVariable <byte>(d_tmp.MeanStdDevGetBufferHostSize() * 3); mean = new CudaDeviceVariable <double>(1); std = new CudaDeviceVariable <double>(1); d_filterX = new float[] { -0.25f, 0.25f, -0.25f, 0.25f }; d_filterY = new float[] { -0.25f, -0.25f, 0.25f, 0.25f }; d_filterT = new float[] { 0.25f, 0.25f, 0.25f, 0.25f }; }
public void AllocateDeviceMemory() { //Allocate FFT Buffer FFTBuffer = new CudaDeviceVariable <byte>(FTTBufferSize); for (int i = 0; i < forward.Length; i++) { forward[i].SetWorkArea(FFTBuffer.DevicePointer); backward[i].SetWorkArea(FFTBuffer.DevicePointer); } int tilePixels = maxPixelsImage; imgToTrackSortedTiles = new CudaDeviceVariable <float>(tilePixels); imgRefSortedTiles = new CudaDeviceVariable <float>(tilePixels); imgCrossCorrelation = new CudaDeviceVariable <float>(tilePixels); int tilePixelsFFT = maxPixelsFFT; imgToTrackCplx = new CudaDeviceVariable <float2>(tilePixelsFFT); imgRefCplx = new CudaDeviceVariable <float2>(tilePixelsFFT); squaredSumsOfTiles = new CudaDeviceVariable <float>(MaxBlockCountX * MaxBlockCountY); shiftImages = new CudaDeviceVariable <float>(maxPixelsShiftImage); patchShift = new NPPImage_32fC2(MaxBlockCountX, MaxBlockCountY); }
public ShiftCollection(int aFrameCount, int aMaxTileCountX, int aMaxTileCountY, int aReferenceIndex, TrackingStrategy aStrategy, int aBlockSize, CudaContext ctx) { strategy = aStrategy; referenceIndex = aReferenceIndex; frameCount = aFrameCount; if (aBlockSize >= aFrameCount) { blockSize = aFrameCount - 1; } else { blockSize = aBlockSize; } blas = new CudaBlas(PointerMode.Device, AtomicsMode.Allowed); one = 1.0f; zero = 0.0f; shiftPairs = new List <ShiftPair>(); int shiftCount = GetShiftCount(); FillShiftPairs(); FillIndexTable(); if (shiftPairs.Count != shiftCount) { throw new Exception("Ooups, something went wrong with my math..."); } shifts = new List <NPPImage_32fC2>(shiftCount); int[] shiftPitches_h = new int[shiftCount]; CUdeviceptr[] ptrList = new CUdeviceptr[shiftCount]; for (int i = 0; i < shiftCount; i++) { NPPImage_32fC2 devVar = new NPPImage_32fC2(aMaxTileCountX, aMaxTileCountY); shifts.Add(devVar); shiftPitches_h[i] = devVar.Pitch; ptrList[i] = devVar.DevicePointer; } shiftPitches = shiftPitches_h; AllShifts_d = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount); shiftsOneToOne_d = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1)); shifts_d = ptrList; status = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY); infoInverse = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY); shiftMatrixArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftMatrixSafeArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); matrixSquareArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); matrixInvertedArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); solvedMatrixArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftOneToOneArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftMeasuredArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftOptimArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftMatrices = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1)); shiftSafeMatrices = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1)); matricesSquared = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1) * (frameCount - 1)); matricesInverted = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1) * (frameCount - 1)); solvedMatrices = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1)); shiftsOneToOne = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1)); pivotArray = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1)); shiftsMeasured = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount); shiftsOptim = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount); buffer = new CudaDeviceVariable <byte>(status.SumGetBufferSize()); statusSum = new CudaDeviceVariable <int>(1); CUmodule mod = ctx.LoadModulePTX("ShiftMinimizerKernels.ptx"); concatenateShifts = new concatenateShiftsKernel(ctx, mod); separateShifts = new separateShiftsKernel(ctx, mod); getOptimalShifts = new getOptimalShiftsKernel(ctx, mod); copyShiftMatrixKernel = new copyShiftMatrixKernel(ctx, mod); setPointers = new setPointersKernel(ctx, mod); checkForOutliers = new checkForOutliersKernel(ctx, mod); transposeShifts = new transposeShiftsKernel(ctx, mod); setPointers.RunSafe(shiftMatrixArray, shiftMatrixSafeArray, matrixSquareArray, matrixInvertedArray, solvedMatrixArray, shiftOneToOneArray, shiftMeasuredArray, shiftOptimArray, shiftMatrices, shiftSafeMatrices, matricesSquared, matricesInverted, solvedMatrices, shiftsOneToOne, shiftsMeasured, shiftsOptim, aMaxTileCountX * aMaxTileCountY, frameCount, shiftCount); Reset(); }
public float RunSafe(CudaDeviceVariable <ushort> dataIn, NPPImage_32fC3 imgOut, NPPImage_32fC3 totalWeights, NPPImage_32fC4 certaintyMask, NPPImage_32fC3 kernelParam, NPPImage_32fC2 shifts, float3 whiteLevel, float3 blackLevel) { SetComputeSize((uint)imgOut.WidthRoi, (uint)imgOut.HeightRoi); return(base.Run(dataIn.DevicePointer, imgOut.DevicePointerRoi, totalWeights.DevicePointerRoi, certaintyMask.DevicePointerRoi, kernelParam.DevicePointerRoi, shifts.DevicePointerRoi, whiteLevel, blackLevel, imgOut.WidthRoi, imgOut.HeightRoi, imgOut.Pitch, certaintyMask.Pitch, shifts.Pitch)); }
/* * * const float3* __restrict__ rawImgRef, * const float3* __restrict__ rawImgMoved, * float3* __restrict__ robustnessMask, * cudaTextureObject_t texUV, * int imgWidth, * int imgHeight, * int imgPitch, * float alpha, * float beta) */ public float RunSafe(NPPImage_32fC3 rawImgRef, NPPImage_32fC3 rawImgMoved, NPPImage_32fC4 robustnessMask, NPPImage_32fC2 shift, float alpha, float beta, float thresholdM) { this.BlockDimensions = new dim3(8, 8, 1); this.SetComputeSize((uint)(rawImgRef.WidthRoi), (uint)(rawImgRef.HeightRoi), 1); this.DynamicSharedMemory = BlockDimensions.x * BlockDimensions.y * float3.SizeOf * 3 * 3; CudaResourceDesc descShift = new CudaResourceDesc(shift); CudaTextureDescriptor texDescShift = new CudaTextureDescriptor(CUAddressMode.Mirror, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates); CudaTexObject texShift = new CudaTexObject(descShift, texDescShift); float t = this.Run(rawImgRef.DevicePointerRoi, rawImgMoved.DevicePointerRoi, robustnessMask.DevicePointerRoi, texShift.TexObject, rawImgRef.WidthRoi, rawImgRef.HeightRoi, rawImgRef.Pitch, robustnessMask.Pitch, alpha, beta, thresholdM); texShift.Dispose(); return(t); }
public float RunSafe(NPPImage_32fC1 inImg, CudaDeviceVariable <float> outTiles, NPPImage_32fC2 preShift, int tileSize, int maxShift, int tileCountX, int tileCountY, float2 baseShift, float baseRotation) { this.SetComputeSize((uint)(tileSize + 2 * maxShift), (uint)(tileSize + 2 * maxShift), (uint)(tileCountX * tileCountY)); return(this.Run(inImg.DevicePointerRoi, outTiles.DevicePointer, preShift.DevicePointer, preShift.Pitch, inImg.WidthRoi, inImg.HeightRoi, inImg.Pitch, maxShift, tileSize, tileCountX, tileCountY, baseShift, baseRotation)); }
public float RunSafe(CudaDeviceVariable <ushort> dataIn, NPPImage_32fC3 imgOut, NPPImage_32fC3 totalWeights, NPPImage_32fC4 certaintyMask, NPPImage_32fC4 kernelParam, NPPImage_32fC2 shifts, float3 whiteLevel, float3 blackLevel) { SetComputeSize((uint)imgOut.WidthRoi, (uint)imgOut.HeightRoi); CudaResourceDesc descKernel = new CudaResourceDesc(kernelParam); CudaTextureDescriptor texDescKernel = new CudaTextureDescriptor(CUAddressMode.Clamp, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates); CudaTexObject texKernel = new CudaTexObject(descKernel, texDescKernel); CudaResourceDesc descShift = new CudaResourceDesc(shifts); CudaTextureDescriptor texDescShift = new CudaTextureDescriptor(CUAddressMode.Mirror, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates); CudaTexObject texShift = new CudaTexObject(descShift, texDescShift); float t = base.Run(dataIn.DevicePointer, imgOut.DevicePointerRoi, totalWeights.DevicePointerRoi, certaintyMask.DevicePointerRoi, texKernel.TexObject, texShift.TexObject, whiteLevel, blackLevel, imgOut.WidthRoi, imgOut.HeightRoi, imgOut.Pitch, certaintyMask.Pitch, kernelParam.Pitch, shifts.Pitch); texShift.Dispose(); texKernel.Dispose(); return(t); }