//////////////////////////////////////////////////////////////////////////////// // Potential occupancy calculator // // The potential occupancy is calculated according to the kernel and // execution configuration the user desires. Occupancy is defined in // terms of active blocks per multiprocessor, and the user can convert // it to other metrics. // // This wrapper routine computes the occupancy of kernel, and reports // it in terms of active warps / maximum warps per SM. //////////////////////////////////////////////////////////////////////////////// static double reportPotentialOccupancy(int blockSize, SizeT dynamicSMem) { int device; int numBlocks; int activeWarps; int maxWarps; double occupancy; CudaOccupancy.cudaOccDeviceProp prop = new CudaOccupancy.cudaOccDeviceProp(0); CudaOccupancy.cudaOccResult result = new CudaOccupancy.cudaOccResult(); CudaOccupancy.cudaOccFuncAttributes attributes = new CudaOccupancy.cudaOccFuncAttributes(kernel); CudaOccupancy.cudaOccDeviceState state = new CudaOccupancy.cudaOccDeviceState(); state.cacheConfig = CudaOccupancy.cudaOccCacheConfig.PreferNone; CudaOccupancy.cudaOccMaxActiveBlocksPerMultiprocessor(result, prop, attributes, state, blockSize, dynamicSMem); numBlocks = result.ActiveBlocksPerMultiProcessor; activeWarps = numBlocks * blockSize / prop.warpSize; maxWarps = prop.maxThreadsPerMultiProcessor / prop.warpSize; occupancy = (double)activeWarps / maxWarps; return occupancy; }