Ejemplo n.º 1
0
        ///*!
        // * Map int to cudaOccCacheConfig
        // */
        //private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state)
        //{
        //    switch(state.cacheConfig)
        //    {
        //        case 0:  return cudaOccCacheConfig.PreferNone;
        //        case 1:  return cudaOccCacheConfig.PreferShared;
        //        case 2:  return cudaOccCacheConfig.PreferL1;
        //        case 3:  return cudaOccCacheConfig.PreferEqual;
        //        default: return cudaOccCacheConfig.PreferNone;
        //    }
        //}

        /*!
         * Shared memory based on config requested by User
         */
        private static int cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig)
        {
            int bytes = 0;
            int sharedMemPerMultiprocessorHigh = (int)properties.sharedMemPerMultiprocessor;
            int sharedMemPerMultiprocessorLow  = (properties.major == 3 && properties.minor == 7)
                                ? MIN_SHARED_MEM_PER_SM_GK210
                                : MIN_SHARED_MEM_PER_SM;

            switch (properties.major)
            {
            case 1:
            case 2: bytes = (cacheConfig == cudaOccCacheConfig.PreferL1)? sharedMemPerMultiprocessorLow : sharedMemPerMultiprocessorHigh;
                break;

            case 3: switch (cacheConfig)
                {
                default:
                case cudaOccCacheConfig.PreferNone:
                case cudaOccCacheConfig.PreferShared:
                    bytes = sharedMemPerMultiprocessorHigh;
                    break;

                case cudaOccCacheConfig.PreferL1:
                    bytes = sharedMemPerMultiprocessorLow;
                    break;

                case cudaOccCacheConfig.PreferEqual:
                    bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2;
                    break;
                }
                break;

            case 5:
            default: bytes = sharedMemPerMultiprocessorHigh;
                break;
            }

            return(bytes);
        }
Ejemplo n.º 2
0
        ///*!
        // * Map int to cudaOccCacheConfig
        // */
        //private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state)
        //{
        //    switch(state.cacheConfig)
        //    {
        //        case 0:  return cudaOccCacheConfig.PreferNone;
        //        case 1:  return cudaOccCacheConfig.PreferShared;
        //        case 2:  return cudaOccCacheConfig.PreferL1;
        //        case 3:  return cudaOccCacheConfig.PreferEqual;
        //        default: return cudaOccCacheConfig.PreferNone;
        //    }
        //}

        /*!
         * Shared memory based on config requested by User
         */
        private static SizeT cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig)
        {
            SizeT bytes = 0;
            SizeT sharedMemPerMultiprocessorHigh = (int)properties.sharedMemPerMultiprocessor;
            // Fermi and Kepler has shared L1 cache / shared memory, and support cache
            // configuration to trade one for the other. These values are needed to
            // calculate the correct shared memory size for user requested cache
            // configuration.
            //
            SizeT minCacheSize                  = 16384;
            SizeT maxCacheSize                  = 49152;
            SizeT cacheAndSharedTotal           = sharedMemPerMultiprocessorHigh + minCacheSize;
            SizeT sharedMemPerMultiprocessorLow = cacheAndSharedTotal - maxCacheSize;


            switch (properties.computeMajor)
            {
            case 2:
                // Fermi supports 48KB / 16KB or 16KB / 48KB partitions for shared /
                // L1.
                //
                switch (cacheConfig)
                {
                default:
                case cudaOccCacheConfig.PreferNone:
                case cudaOccCacheConfig.PreferShared:
                case cudaOccCacheConfig.PreferEqual:
                    bytes = sharedMemPerMultiprocessorHigh;
                    break;

                case cudaOccCacheConfig.PreferL1:
                    bytes = sharedMemPerMultiprocessorLow;
                    break;
                }
                break;

            case 3:
                // Kepler supports 16KB, 32KB, or 48KB partitions for L1. The rest
                // is shared memory.
                //
                switch (cacheConfig)
                {
                default:
                case cudaOccCacheConfig.PreferNone:
                case cudaOccCacheConfig.PreferShared:
                    bytes = sharedMemPerMultiprocessorHigh;
                    break;

                case cudaOccCacheConfig.PreferL1:
                    bytes = sharedMemPerMultiprocessorLow;
                    break;

                case cudaOccCacheConfig.PreferEqual:
                    // Equal is the mid-point between high and low. It should be
                    // equivalent to low + 16KB.
                    //
                    bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2;
                    break;
                }
                break;

            case 5:
            case 6:
                // Maxwell and Pascal have dedicated shared memory.
                //
                bytes = sharedMemPerMultiprocessorHigh;
                break;

            default: throw new CudaOccupancyException(cudaOccError.ErrorUnknownDevice);
            }

            return(bytes);
        }
Ejemplo n.º 3
0
		///*!
		// * Map int to cudaOccCacheConfig
		// */
		//private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state)
		//{
		//    switch(state.cacheConfig)
		//    {
		//        case 0:  return cudaOccCacheConfig.PreferNone;
		//        case 1:  return cudaOccCacheConfig.PreferShared;
		//        case 2:  return cudaOccCacheConfig.PreferL1;
		//        case 3:  return cudaOccCacheConfig.PreferEqual;
		//        default: return cudaOccCacheConfig.PreferNone;
		//    }
		//}

		/*!
		 * Shared memory based on config requested by User
		 */
		private static int cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig)
		{
			int bytes = 0;
			int sharedMemPerMultiprocessorHigh = (int) properties.sharedMemPerMultiprocessor;
			int sharedMemPerMultiprocessorLow  = (properties.major==3 && properties.minor==7)
				? MIN_SHARED_MEM_PER_SM_GK210
				: MIN_SHARED_MEM_PER_SM ;

			switch(properties.major)
			{
				case 1:
				case 2: bytes = (cacheConfig == cudaOccCacheConfig.PreferL1)? sharedMemPerMultiprocessorLow : sharedMemPerMultiprocessorHigh;
						break;
				case 3: switch (cacheConfig)
						{
							default :
							case cudaOccCacheConfig.PreferNone:
							case cudaOccCacheConfig.PreferShared:
									bytes = sharedMemPerMultiprocessorHigh;
									break;
							case cudaOccCacheConfig.PreferL1:
									bytes = sharedMemPerMultiprocessorLow;
									break;
							case cudaOccCacheConfig.PreferEqual:
									bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2;
									break;
						}
						break;
				case 5:
				default: bytes = sharedMemPerMultiprocessorHigh;
						 break;
			}

			return bytes;
		}
Ejemplo n.º 4
0
        ///*!
        // * Map int to cudaOccCacheConfig
        // */
        //private static cudaOccCacheConfig cudaOccGetCacheConfig(cudaOccDeviceState state)
        //{
        //    switch(state.cacheConfig)
        //    {
        //        case 0:  return cudaOccCacheConfig.PreferNone;
        //        case 1:  return cudaOccCacheConfig.PreferShared;
        //        case 2:  return cudaOccCacheConfig.PreferL1;
        //        case 3:  return cudaOccCacheConfig.PreferEqual;
        //        default: return cudaOccCacheConfig.PreferNone;
        //    }
        //}
        /*!
         * Shared memory based on config requested by User
         */
        private static SizeT cudaOccSMemPerMultiprocessor(cudaOccDeviceProp properties, cudaOccCacheConfig cacheConfig)
        {
            SizeT bytes = 0;
            SizeT sharedMemPerMultiprocessorHigh = (int)properties.sharedMemPerMultiprocessor;
            // Fermi and Kepler has shared L1 cache / shared memory, and support cache
            // configuration to trade one for the other. These values are needed to
            // calculate the correct shared memory size for user requested cache
            // configuration.
            //
            SizeT minCacheSize = 16384;
            SizeT maxCacheSize = 49152;
            SizeT cacheAndSharedTotal = sharedMemPerMultiprocessorHigh + minCacheSize;
            SizeT sharedMemPerMultiprocessorLow = cacheAndSharedTotal - maxCacheSize;

            switch (properties.computeMajor)
            {
                case 2:
                    // Fermi supports 48KB / 16KB or 16KB / 48KB partitions for shared /
                    // L1.
                    //
                    switch (cacheConfig)
                    {
                        default:
                        case cudaOccCacheConfig.PreferNone:
                        case cudaOccCacheConfig.PreferShared:
                        case cudaOccCacheConfig.PreferEqual:
                            bytes = sharedMemPerMultiprocessorHigh;
                            break;
                        case cudaOccCacheConfig.PreferL1:
                            bytes = sharedMemPerMultiprocessorLow;
                            break;
                    }
                    break;
                case 3:
                    // Kepler supports 16KB, 32KB, or 48KB partitions for L1. The rest
                    // is shared memory.
                    //
                    switch (cacheConfig)
                    {
                        default:
                        case cudaOccCacheConfig.PreferNone:
                        case cudaOccCacheConfig.PreferShared:
                            bytes = sharedMemPerMultiprocessorHigh;
                            break;
                        case cudaOccCacheConfig.PreferL1:
                            bytes = sharedMemPerMultiprocessorLow;
                            break;
                        case cudaOccCacheConfig.PreferEqual:
                            // Equal is the mid-point between high and low. It should be
                            // equivalent to low + 16KB.
                            //
                            bytes = (sharedMemPerMultiprocessorHigh + sharedMemPerMultiprocessorLow) / 2;
                            break;
                    }
                    break;
                case 5:
                case 6:
                    // Maxwell and Pascal have dedicated shared memory.
                    //
                    bytes = sharedMemPerMultiprocessorHigh;
                    break;
                default: throw new CudaOccupancyException(cudaOccError.ErrorUnknownDevice);
            }

            return bytes;
        }