///*! // * Map int to cudaOccCacheConfig // */ //private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state) //{ // switch(state.cacheConfig) // { // case 0: return cudaOccCacheConfig.PreferNone; // case 1: return cudaOccCacheConfig.PreferShared; // case 2: return cudaOccCacheConfig.PreferL1; // case 3: return cudaOccCacheConfig.PreferEqual; // default: return cudaOccCacheConfig.PreferNone; // } //} /*! * Shared memory based on config requested by User */ private static int cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig) { int bytes = 0; int sharedMemPerMultiprocessorHigh = (int)properties.sharedMemPerMultiprocessor; int sharedMemPerMultiprocessorLow = (properties.major == 3 && properties.minor == 7) ? MIN_SHARED_MEM_PER_SM_GK210 : MIN_SHARED_MEM_PER_SM; switch (properties.major) { case 1: case 2: bytes = (cacheConfig == cudaOccCacheConfig.PreferL1)? sharedMemPerMultiprocessorLow : sharedMemPerMultiprocessorHigh; break; case 3: switch (cacheConfig) { default: case cudaOccCacheConfig.PreferNone: case cudaOccCacheConfig.PreferShared: bytes = sharedMemPerMultiprocessorHigh; break; case cudaOccCacheConfig.PreferL1: bytes = sharedMemPerMultiprocessorLow; break; case cudaOccCacheConfig.PreferEqual: bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2; break; } break; case 5: default: bytes = sharedMemPerMultiprocessorHigh; break; } return(bytes); }
///*! // * Map int to cudaOccCacheConfig // */ //private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state) //{ // switch(state.cacheConfig) // { // case 0: return cudaOccCacheConfig.PreferNone; // case 1: return cudaOccCacheConfig.PreferShared; // case 2: return cudaOccCacheConfig.PreferL1; // case 3: return cudaOccCacheConfig.PreferEqual; // default: return cudaOccCacheConfig.PreferNone; // } //} /*! * Shared memory based on config requested by User */ private static SizeT cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig) { SizeT bytes = 0; SizeT sharedMemPerMultiprocessorHigh = (int)properties.sharedMemPerMultiprocessor; // Fermi and Kepler has shared L1 cache / shared memory, and support cache // configuration to trade one for the other. These values are needed to // calculate the correct shared memory size for user requested cache // configuration. // SizeT minCacheSize = 16384; SizeT maxCacheSize = 49152; SizeT cacheAndSharedTotal = sharedMemPerMultiprocessorHigh + minCacheSize; SizeT sharedMemPerMultiprocessorLow = cacheAndSharedTotal - maxCacheSize; switch (properties.computeMajor) { case 2: // Fermi supports 48KB / 16KB or 16KB / 48KB partitions for shared / // L1. // switch (cacheConfig) { default: case cudaOccCacheConfig.PreferNone: case cudaOccCacheConfig.PreferShared: case cudaOccCacheConfig.PreferEqual: bytes = sharedMemPerMultiprocessorHigh; break; case cudaOccCacheConfig.PreferL1: bytes = sharedMemPerMultiprocessorLow; break; } break; case 3: // Kepler supports 16KB, 32KB, or 48KB partitions for L1. The rest // is shared memory. // switch (cacheConfig) { default: case cudaOccCacheConfig.PreferNone: case cudaOccCacheConfig.PreferShared: bytes = sharedMemPerMultiprocessorHigh; break; case cudaOccCacheConfig.PreferL1: bytes = sharedMemPerMultiprocessorLow; break; case cudaOccCacheConfig.PreferEqual: // Equal is the mid-point between high and low. It should be // equivalent to low + 16KB. // bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2; break; } break; case 5: case 6: // Maxwell and Pascal have dedicated shared memory. // bytes = sharedMemPerMultiprocessorHigh; break; default: throw new CudaOccupancyException(cudaOccError.ErrorUnknownDevice); } return(bytes); }
///*! // * Map int to cudaOccCacheConfig // */ //private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state) //{ // switch(state.cacheConfig) // { // case 0: return cudaOccCacheConfig.PreferNone; // case 1: return cudaOccCacheConfig.PreferShared; // case 2: return cudaOccCacheConfig.PreferL1; // case 3: return cudaOccCacheConfig.PreferEqual; // default: return cudaOccCacheConfig.PreferNone; // } //} /*! * Shared memory based on config requested by User */ private static int cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig) { int bytes = 0; int sharedMemPerMultiprocessorHigh = (int) properties.sharedMemPerMultiprocessor; int sharedMemPerMultiprocessorLow = (properties.major==3 && properties.minor==7) ? MIN_SHARED_MEM_PER_SM_GK210 : MIN_SHARED_MEM_PER_SM ; switch(properties.major) { case 1: case 2: bytes = (cacheConfig == cudaOccCacheConfig.PreferL1)? sharedMemPerMultiprocessorLow : sharedMemPerMultiprocessorHigh; break; case 3: switch (cacheConfig) { default : case cudaOccCacheConfig.PreferNone: case cudaOccCacheConfig.PreferShared: bytes = sharedMemPerMultiprocessorHigh; break; case cudaOccCacheConfig.PreferL1: bytes = sharedMemPerMultiprocessorLow; break; case cudaOccCacheConfig.PreferEqual: bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2; break; } break; case 5: default: bytes = sharedMemPerMultiprocessorHigh; break; } return bytes; }
///*! // * Map int to cudaOccCacheConfig // */ //private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state) //{ // switch(state.cacheConfig) // { // case 0: return cudaOccCacheConfig.PreferNone; // case 1: return cudaOccCacheConfig.PreferShared; // case 2: return cudaOccCacheConfig.PreferL1; // case 3: return cudaOccCacheConfig.PreferEqual; // default: return cudaOccCacheConfig.PreferNone; // } //} /*! * Shared memory based on config requested by User */ private static SizeT cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig) { SizeT bytes = 0; SizeT sharedMemPerMultiprocessorHigh = (int)properties.sharedMemPerMultiprocessor; // Fermi and Kepler has shared L1 cache / shared memory, and support cache // configuration to trade one for the other. These values are needed to // calculate the correct shared memory size for user requested cache // configuration. // SizeT minCacheSize = 16384; SizeT maxCacheSize = 49152; SizeT cacheAndSharedTotal = sharedMemPerMultiprocessorHigh + minCacheSize; SizeT sharedMemPerMultiprocessorLow = cacheAndSharedTotal - maxCacheSize; switch (properties.computeMajor) { case 2: // Fermi supports 48KB / 16KB or 16KB / 48KB partitions for shared / // L1. // switch (cacheConfig) { default: case cudaOccCacheConfig.PreferNone: case cudaOccCacheConfig.PreferShared: case cudaOccCacheConfig.PreferEqual: bytes = sharedMemPerMultiprocessorHigh; break; case cudaOccCacheConfig.PreferL1: bytes = sharedMemPerMultiprocessorLow; break; } break; case 3: // Kepler supports 16KB, 32KB, or 48KB partitions for L1. The rest // is shared memory. // switch (cacheConfig) { default: case cudaOccCacheConfig.PreferNone: case cudaOccCacheConfig.PreferShared: bytes = sharedMemPerMultiprocessorHigh; break; case cudaOccCacheConfig.PreferL1: bytes = sharedMemPerMultiprocessorLow; break; case cudaOccCacheConfig.PreferEqual: // Equal is the mid-point between high and low. It should be // equivalent to low + 16KB. // bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2; break; } break; case 5: case 6: // Maxwell and Pascal have dedicated shared memory. // bytes = sharedMemPerMultiprocessorHigh; break; default: throw new CudaOccupancyException(cudaOccError.ErrorUnknownDevice); } return bytes; }