define our own cudaOccFuncAttributes to stay consistent with the original header file
Exemplo n.º 1
0
        ////////////////////////////////////////////////////////////////////////////////
        // Potential occupancy calculator
        //
        // The potential occupancy is calculated according to the kernel and
        // execution configuration the user desires. Occupancy is defined in
        // terms of active blocks per multiprocessor, and the user can convert
        // it to other metrics.
        //
        // This wrapper routine computes the occupancy of kernel, and reports
        // it in terms of active warps / maximum warps per SM.
        ////////////////////////////////////////////////////////////////////////////////
        static double reportPotentialOccupancy(int blockSize, SizeT dynamicSMem)
        {
            int device;

            int numBlocks;
            int activeWarps;
            int maxWarps;

            double occupancy;

            CudaOccupancy.cudaOccDeviceProp prop = new CudaOccupancy.cudaOccDeviceProp(0);

            CudaOccupancy.cudaOccResult result = new CudaOccupancy.cudaOccResult();
            CudaOccupancy.cudaOccFuncAttributes attributes = new CudaOccupancy.cudaOccFuncAttributes(kernel);
            CudaOccupancy.cudaOccDeviceState state = new CudaOccupancy.cudaOccDeviceState();
            state.cacheConfig = CudaOccupancy.cudaOccCacheConfig.PreferNone;

            CudaOccupancy.cudaOccMaxActiveBlocksPerMultiprocessor(result, prop, attributes, state, blockSize, dynamicSMem);

            numBlocks = result.ActiveBlocksPerMultiProcessor;

            activeWarps = numBlocks * blockSize / prop.warpSize;
            maxWarps = prop.maxThreadsPerMultiProcessor / prop.warpSize;

            occupancy = (double)activeWarps / maxWarps;

            return occupancy;
        }