public FinalLayer(int width, int height, int channels, int batch, Norm norm, CudaContext ctx, CUmodule mod) : base(width, height, channels, 0, 0, 0, batch) { _groundTrouthData = new CudaDeviceVariable <float>(width * height * channels * batch); _dx = new CudaDeviceVariable <float>(width * height * channels * batch); _temp = new CudaDeviceVariable <float>(width * height * channels * batch); _res = new CudaDeviceVariable <float>(width * height * channels * batch); _buffer = new CudaDeviceVariable <byte>(_temp.SumGetBufferSize() * 100); _summedError = new CudaDeviceVariable <float>(1); _mean = new CudaDeviceVariable <double>(1); _norm = norm; _kernelMSSSIML1 = new MSSSIML1Kernel(mod, ctx); if (_norm == Norm.Mix || _norm == Norm.MSSSIM) { _msssiml1 = new CudaDeviceVariable <float>(channels * batch); } }
public ShiftCollection(int aFrameCount, int aMaxTileCountX, int aMaxTileCountY, int aReferenceIndex, TrackingStrategy aStrategy, int aBlockSize, CudaContext ctx) { strategy = aStrategy; referenceIndex = aReferenceIndex; frameCount = aFrameCount; if (aBlockSize >= aFrameCount) { blockSize = aFrameCount - 1; } else { blockSize = aBlockSize; } blas = new CudaBlas(PointerMode.Device, AtomicsMode.Allowed); one = 1.0f; zero = 0.0f; shiftPairs = new List <ShiftPair>(); int shiftCount = GetShiftCount(); FillShiftPairs(); FillIndexTable(); if (shiftPairs.Count != shiftCount) { throw new Exception("Ooups, something went wrong with my math..."); } shifts = new List <NPPImage_32fC2>(shiftCount); int[] shiftPitches_h = new int[shiftCount]; CUdeviceptr[] ptrList = new CUdeviceptr[shiftCount]; for (int i = 0; i < shiftCount; i++) { NPPImage_32fC2 devVar = new NPPImage_32fC2(aMaxTileCountX, aMaxTileCountY); shifts.Add(devVar); shiftPitches_h[i] = devVar.Pitch; ptrList[i] = devVar.DevicePointer; } shiftPitches = shiftPitches_h; AllShifts_d = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount); shiftsOneToOne_d = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1)); shifts_d = ptrList; status = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY); infoInverse = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY); shiftMatrixArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftMatrixSafeArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); matrixSquareArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); matrixInvertedArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); solvedMatrixArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftOneToOneArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftMeasuredArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftOptimArray = new CudaDeviceVariable <CUdeviceptr>(aMaxTileCountX * aMaxTileCountY); shiftMatrices = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1)); shiftSafeMatrices = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1)); matricesSquared = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1) * (frameCount - 1)); matricesInverted = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1) * (frameCount - 1)); solvedMatrices = new CudaDeviceVariable <float>(aMaxTileCountX * aMaxTileCountY * shiftCount * (frameCount - 1)); shiftsOneToOne = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1)); pivotArray = new CudaDeviceVariable <int>(aMaxTileCountX * aMaxTileCountY * (frameCount - 1)); shiftsMeasured = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount); shiftsOptim = new CudaDeviceVariable <float2>(aMaxTileCountX * aMaxTileCountY * shiftCount); buffer = new CudaDeviceVariable <byte>(status.SumGetBufferSize()); statusSum = new CudaDeviceVariable <int>(1); CUmodule mod = ctx.LoadModulePTX("ShiftMinimizerKernels.ptx"); concatenateShifts = new concatenateShiftsKernel(ctx, mod); separateShifts = new separateShiftsKernel(ctx, mod); getOptimalShifts = new getOptimalShiftsKernel(ctx, mod); copyShiftMatrixKernel = new copyShiftMatrixKernel(ctx, mod); setPointers = new setPointersKernel(ctx, mod); checkForOutliers = new checkForOutliersKernel(ctx, mod); transposeShifts = new transposeShiftsKernel(ctx, mod); setPointers.RunSafe(shiftMatrixArray, shiftMatrixSafeArray, matrixSquareArray, matrixInvertedArray, solvedMatrixArray, shiftOneToOneArray, shiftMeasuredArray, shiftOptimArray, shiftMatrices, shiftSafeMatrices, matricesSquared, matricesInverted, solvedMatrices, shiftsOneToOne, shiftsMeasured, shiftsOptim, aMaxTileCountX * aMaxTileCountY, frameCount, shiftCount); Reset(); }