示例#1
0
        public FinalLayer(int width, int height, int channels, int batch, Norm norm, CudaContext ctx, CUmodule mod)
            : base(width, height, channels, 0, 0, 0, batch)
        {
            _groundTrouthData = new CudaDeviceVariable <float>(width * height * channels * batch);
            _dx             = new CudaDeviceVariable <float>(width * height * channels * batch);
            _temp           = new CudaDeviceVariable <float>(width * height * channels * batch);
            _res            = new CudaDeviceVariable <float>(width * height * channels * batch);
            _buffer         = new CudaDeviceVariable <byte>(_temp.SumGetBufferSize() * 100);
            _summedError    = new CudaDeviceVariable <float>(1);
            _mean           = new CudaDeviceVariable <double>(1);
            _norm           = norm;
            _kernelMSSSIML1 = new MSSSIML1Kernel(mod, ctx);

            if (_norm == Norm.Mix || _norm == Norm.MSSSIM)
            {
                _msssiml1 = new CudaDeviceVariable <float>(channels * batch);
            }
        }
        public ShiftCollection(int aFrameCount, int aMaxTileCountX, int aMaxTileCountY, int aReferenceIndex, TrackingStrategy aStrategy, int aBlockSize, CudaContext ctx)
        {
            strategy       = aStrategy;
            referenceIndex = aReferenceIndex;
            frameCount     = aFrameCount;
            if (aBlockSize >= aFrameCount)
            {
                blockSize = aFrameCount - 1;
            }
            else
            {
                blockSize = aBlockSize;
            }

            blas = new CudaBlas(PointerMode.Device, AtomicsMode.Allowed);
            one  = 1.0f;
            zero = 0.0f;

            shiftPairs = new List <ShiftPair>();
            int shiftCount = GetShiftCount();

            FillShiftPairs();
            FillIndexTable();


            if (shiftPairs.Count != shiftCount)
            {
                throw new Exception("Ooups, something went wrong with my math...");
            }

            shifts = new List <NPPImage_32fC2>(shiftCount);

            int[]         shiftPitches_h = new int[shiftCount];
            CUdeviceptr[] ptrList        = new CUdeviceptr[shiftCount];
            for (int i = 0; i < shiftCount; i++)
            {
                NPPImage_32fC2 devVar = new NPPImage_32fC2(aMaxTileCountX, aMaxTileCountY);
                shifts.Add(devVar);
                shiftPitches_h[i] = devVar.Pitch;
                ptrList[i]        = devVar.DevicePointer;
            }
            shiftPitches     = shiftPitches_h;
            AllShifts_d      = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount);
            shiftsOneToOne_d = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1));
            shifts_d         = ptrList;



            status               = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY);
            infoInverse          = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY);
            shiftMatrixArray     = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            shiftMatrixSafeArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            matrixSquareArray    = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            matrixInvertedArray  = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            solvedMatrixArray    = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            shiftOneToOneArray   = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            shiftMeasuredArray   = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            shiftOptimArray      = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY);
            shiftMatrices        = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1));
            shiftSafeMatrices    = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1));
            matricesSquared      = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1) * (frameCount - 1));
            matricesInverted     = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1) * (frameCount - 1));
            solvedMatrices       = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1));
            shiftsOneToOne       = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1));
            pivotArray           = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1));
            shiftsMeasured       = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount);
            shiftsOptim          = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount);
            buffer               = new CudaDeviceVariable <byte>(status.SumGetBufferSize());
            statusSum            = new CudaDeviceVariable <int>(1);



            CUmodule mod = ctx.LoadModulePTX("ShiftMinimizerKernels.ptx");

            concatenateShifts     = new concatenateShiftsKernel(ctx, mod);
            separateShifts        = new separateShiftsKernel(ctx, mod);
            getOptimalShifts      = new getOptimalShiftsKernel(ctx, mod);
            copyShiftMatrixKernel = new copyShiftMatrixKernel(ctx, mod);
            setPointers           = new setPointersKernel(ctx, mod);
            checkForOutliers      = new checkForOutliersKernel(ctx, mod);
            transposeShifts       = new transposeShiftsKernel(ctx, mod);

            setPointers.RunSafe(shiftMatrixArray, shiftMatrixSafeArray, matrixSquareArray, matrixInvertedArray, solvedMatrixArray,
                                shiftOneToOneArray, shiftMeasuredArray, shiftOptimArray, shiftMatrices, shiftSafeMatrices, matricesSquared,
                                matricesInverted, solvedMatrices, shiftsOneToOne, shiftsMeasured, shiftsOptim, aMaxTileCountX * aMaxTileCountY, frameCount, shiftCount);



            Reset();
        }