Example #1
0
        public override float Inference(CudaDeviceVariable <float> input)
        {
            _input = input;

            NPPImage_32fC1 tempConv = new NPPImage_32fC1(_tempConvolution.DevicePointer, InWidth, InHeight, InWidth * sizeof(float));

            for (int outLayer = 0; outLayer < OutChannels; outLayer++)
            {
                SizeT          offsetOut        = outLayer * OutWidth * OutHeight * sizeof(float);
                CUdeviceptr    ptrWithOffsetOut = _z.DevicePointer + offsetOut;
                NPPImage_32fC1 imgOut           = new NPPImage_32fC1(ptrWithOffsetOut, OutWidth, OutHeight, OutWidth * sizeof(float));
                imgOut.Set(0);

                for (int inLayer = 0; inLayer < InChannels; inLayer++)
                {
                    SizeT          offsetIn        = inLayer * InWidth * InHeight * sizeof(float);
                    CUdeviceptr    ptrWithOffsetIn = _input.DevicePointer + offsetIn;
                    NPPImage_32fC1 imgIn           = new NPPImage_32fC1(ptrWithOffsetIn, InWidth, InHeight, InWidth * sizeof(float));

                    imgIn.SetRoi(_filterX / 2, _filterY / 2, InWidth - _filterX + 1, InHeight - _filterY + 1);

                    SizeT offsetFilter = (outLayer * InChannels * _filterX * _filterY + inLayer * _filterX * _filterY) * sizeof(float);
                    CudaDeviceVariable <float> filter = new CudaDeviceVariable <float>(_weights.DevicePointer + offsetFilter, false, _filterX * _filterY * sizeof(float));

                    imgIn.Filter(tempConv, filter, new NppiSize(_filterX, _filterY), new NppiPoint(_filterX / 2, _filterY / 2));
                    imgOut.Add(tempConv);
                }
                imgOut.Add(bHost[outLayer]);
            }

            switch (_activation)
            {
            case Activation.None:
                _y.CopyToDevice(_z);
                break;

            case Activation.Relu:
                //_aRelu is set to 0!
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            case Activation.PRelu:
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            case Activation.LeakyRelu:
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            default:
                break;
            }

            return(_nextLayer.Inference(_y));
        }
Example #2
0
        public void Track(NPPImage_32fC1 imgTrack, NPPImage_32fC1 imgRef, NPPImage_32fC2 preShift, int i, float2 baseShiftRef, float baseRotationRef, float2 baseShifttoTrack, float baseRotationtoTrack, float threshold)
        {
            if (imgTrack.WidthRoi != imgRef.WidthRoi || imgTrack.HeightRoi != imgRef.HeightRoi ||
                imgTrack.WidthRoi != currentWidth || imgTrack.HeightRoi != currentHeight)
            {
                throw new ArgumentOutOfRangeException();
            }

            int level = imgTrack.Width / imgTrack.WidthRoi;

            convertToTilesBorder.RunSafe(imgRef, imgRefSortedTiles, currentTileSize, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, baseShiftRef, baseRotationRef); //template
            forward[i].Exec(imgRefSortedTiles.DevicePointer, imgRefCplx.DevicePointer);

            convertToTiles.RunSafe(imgTrack, imgToTrackSortedTiles, preShift, currentTileSize, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, baseShifttoTrack, baseRotationtoTrack); //image in paper

            //DumpFloat(imgToTrackSortedTiles, currentTileSize + 2* currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesTrack_" + level + "_" + debugCallCounter + ".bin");
            //DumpFloat(imgRefSortedTiles, currentTileSize + 2 * currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesRef_" + level + "_" + debugCallCounter + ".bin");

            forward[i].Exec(imgToTrackSortedTiles.DevicePointer, imgToTrackCplx.DevicePointer);

            conjKernel.RunSafe(imgRefCplx, imgToTrackCplx);

            backward[i].Exec(imgToTrackCplx.DevicePointer, imgCrossCorrelation.DevicePointer);
            imgCrossCorrelation.DivC(CurrentBlockSize * CurrentBlockSize);

            squaredSumKernel.RunSafe(imgRefSortedTiles, squaredSumsOfTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY);
            //DumpFloat(squaredSumsOfTiles, 1, 1, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "squaredSums_" + level + "_" + debugCallCounter + ".bin");

            boxFilterXKernel.RunSafe(imgToTrackSortedTiles, imgRefSortedTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY);
            boxFilterYKernel.RunSafe(imgRefSortedTiles, imgToTrackSortedTiles, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY);
            //DumpFloat(imgToTrackSortedTiles, currentTileSize + 2 * currentMaxShift, currentTileSize + 2 * currentMaxShift, CurrentBlockCountX * CurrentBlockCountY, tileIdx, "boxFilter_" + level + "_" + debugCallCounter + ".bin");
            normalizedCCKernel.RunSafe(imgCrossCorrelation, squaredSumsOfTiles, imgToTrackSortedTiles, shiftImages, currentMaxShift, currentTileSize, CurrentBlockCountX * CurrentBlockCountY);

            //DumpFloat(shiftImages, (2 * currentMaxShift + 1), (2 * currentMaxShift + 1), CurrentBlockCountX * CurrentBlockCountY, tileIdx, "tilesShift_" + level + "_" + debugCallCounter + ".bin");

            patchShift.SetRoi(0, 0, CurrentBlockCountX, CurrentBlockCountY);
            findMinimumKernel.RunSafe(shiftImages, patchShift, currentMaxShift, CurrentBlockCountX, CurrentBlockCountY, threshold);

            NPPImage_32fC1 preShiftFloat   = new NPPImage_32fC1(preShift.DevicePointer, 2 * CurrentBlockCountX, CurrentBlockCountY, preShift.Pitch);
            NPPImage_32fC1 patchShiftFloat = new NPPImage_32fC1(patchShift.DevicePointer, 2 * CurrentBlockCountX, CurrentBlockCountY, patchShift.Pitch);

            preShiftFloat.Add(patchShiftFloat);
            debugCallCounter++;
        }