public override float Inference(CudaDeviceVariable <float> input) { _input = input; NPPImage_32fC1 tempConv = new NPPImage_32fC1(_tempConvolution.DevicePointer, InWidth, InHeight, InWidth * sizeof(float)); for (int outLayer = 0; outLayer < OutChannels; outLayer++) { SizeT offsetOut = outLayer * OutWidth * OutHeight * sizeof(float); CUdeviceptr ptrWithOffsetOut = _z.DevicePointer + offsetOut; NPPImage_32fC1 imgOut = new NPPImage_32fC1(ptrWithOffsetOut, OutWidth, OutHeight, OutWidth * sizeof(float)); imgOut.Set(0); for (int inLayer = 0; inLayer < InChannels; inLayer++) { SizeT offsetIn = inLayer * InWidth * InHeight * sizeof(float); CUdeviceptr ptrWithOffsetIn = _input.DevicePointer + offsetIn; NPPImage_32fC1 imgIn = new NPPImage_32fC1(ptrWithOffsetIn, InWidth, InHeight, InWidth * sizeof(float)); imgIn.SetRoi(_filterX / 2, _filterY / 2, InWidth - _filterX + 1, InHeight - _filterY + 1); SizeT offsetFilter = (outLayer * InChannels * _filterX * _filterY + inLayer * _filterX * _filterY) * sizeof(float); CudaDeviceVariable <float> filter = new CudaDeviceVariable <float>(_weights.DevicePointer + offsetFilter, false, _filterX * _filterY * sizeof(float)); imgIn.Filter(tempConv, filter, new NppiSize(_filterX, _filterY), new NppiPoint(_filterX / 2, _filterY / 2)); imgOut.Add(tempConv); } imgOut.Add(bHost[outLayer]); } switch (_activation) { case Activation.None: _y.CopyToDevice(_z); break; case Activation.Relu: //_aRelu is set to 0! _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch); break; case Activation.PRelu: _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch); break; case Activation.LeakyRelu: _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch); break; default: break; } return(_nextLayer.Inference(_y)); }
public void Track(NPPImage_32fC1 imgTrack, NPPImage_32fC1 imgRef, NPPImage_32fC2 preShift, int i, float2 baseShiftRef, float baseRotationRef, float2 baseShifttoTrack, float baseRotationtoTrack, float threshold) { if (imgTrack.WidthRoi != imgRef.WidthRoi || imgTrack.HeightRoi != imgRef.HeightRoi || imgTrack.WidthRoi != currentWidth || imgTrack.HeightRoi != currentHeight) { throw new ArgumentOutOfRangeException(); } int level = imgTrack.Width / imgTrack.WidthRoi; convertToTilesBorder.RunSafe(imgRef, imgRefSortedTiles, currentTileSize, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, baseShiftRef, baseRotationRef); //template forward[i].Exec(imgRefSortedTiles.DevicePointer, imgRefCplx.DevicePointer); convertToTiles.RunSafe(imgTrack, imgToTrackSortedTiles, preShift, currentTileSize, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, baseShifttoTrack, baseRotationtoTrack); //image in paper //DumpFloat(imgToTrackSortedTiles, currentTileSize + 2* currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesTrack_" + level + "_" + debugCallCounter + ".bin"); //DumpFloat(imgRefSortedTiles, currentTileSize + 2 * currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesRef_" + level + "_" + debugCallCounter + ".bin"); forward[i].Exec(imgToTrackSortedTiles.DevicePointer, imgToTrackCplx.DevicePointer); conjKernel.RunSafe(imgRefCplx, imgToTrackCplx); backward[i].Exec(imgToTrackCplx.DevicePointer, imgCrossCorrelation.DevicePointer); imgCrossCorrelation.DivC(CurrentBlockSize * CurrentBlockSize); squaredSumKernel.RunSafe(imgRefSortedTiles, squaredSumsOfTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); //DumpFloat(squaredSumsOfTiles, 1, 1, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "squaredSums_" + level + "_" + debugCallCounter + ".bin"); boxFilterXKernel.RunSafe(imgToTrackSortedTiles, imgRefSortedTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); boxFilterYKernel.RunSafe(imgRefSortedTiles, imgToTrackSortedTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); //DumpFloat(imgToTrackSortedTiles, currentTileSize + 2 * currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "boxFilter_" + level + "_" + debugCallCounter + ".bin"); normalizedCCKernel.RunSafe(imgCrossCorrelation, squaredSumsOfTiles, imgToTrackSortedTiles, shiftImages, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY); //DumpFloat(shiftImages, (2 * currentMaxShift + 1), (2 * currentMaxShift + 1), CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesShift_" + level + "_" + debugCallCounter + ".bin"); patchShift.SetRoi(0, 0, CurrentBlockCountX, CurrentBlockCountY); findMinimumKernel.RunSafe(shiftImages, patchShift, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, threshold); NPPImage_32fC1 preShiftFloat = new NPPImage_32fC1(preShift.DevicePointer, 2 * CurrentBlockCountX, CurrentBlockCountY, preShift.Pitch); NPPImage_32fC1 patchShiftFloat = new NPPImage_32fC1(patchShift.DevicePointer, 2 * CurrentBlockCountX, CurrentBlockCountY, patchShift.Pitch); preShiftFloat.Add(patchShiftFloat); debugCallCounter++; }