Ejemplo n.º 1
0
        ///////////////////////////////////////////////
        //    Occupancy calculation Functions        //
        ///////////////////////////////////////////////

        /// <summary>
        /// Determine the maximum number of CTAs that can be run simultaneously per SM.<para/>
        /// This is equivalent to the calculation done in the CUDA Occupancy Calculator
        /// spreadsheet
        /// </summary>
        /// <param name="properties"></param>
        /// <param name="kernel"></param>
        /// <param name="state"></param>
        /// <returns></returns>
        public static cudaOccResult cudaOccMaxActiveBlocksPerMultiprocessor(
            CudaDeviceProperties properties,
            CudaKernel kernel,
            cudaOccDeviceState state)
        {
            cudaOccDeviceProp     props      = new cudaOccDeviceProp(properties);
            cudaOccFuncAttributes attributes = new cudaOccFuncAttributes(kernel);

            return(cudaOccMaxActiveBlocksPerMultiprocessor(props, attributes, (int)kernel.BlockDimensions.x * (int)kernel.BlockDimensions.y * (int)kernel.BlockDimensions.z, kernel.DynamicSharedMemory, state));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Determine the potential block size that allows maximum number of CTAs that can run on multiprocessor simultaneously
        /// </summary>
        /// <param name="properties"></param>
        /// <param name="kernel"></param>
        /// <param name="state"></param>
        /// <param name="blockSizeToSMem">
        /// A function to convert from block size to dynamic shared memory size.<para/>
        /// e.g.:
        /// If no dynamic shared memory is used: x => 0<para/>
        /// If 4 bytes shared memory per thread is used: x = 4 * x</param>
        /// <returns>maxBlockSize</returns>
        public static int cudaOccMaxPotentialOccupancyBlockSize(
            CudaDeviceProperties properties,
            CudaKernel kernel,
            cudaOccDeviceState state,
            del_blockSizeToDynamicSMemSize blockSizeToSMem)
        {
            cudaOccDeviceProp     props      = new cudaOccDeviceProp(properties);
            cudaOccFuncAttributes attributes = new cudaOccFuncAttributes(kernel);

            return(cudaOccMaxPotentialOccupancyBlockSize(props, attributes, state, blockSizeToSMem));
        }
Ejemplo n.º 3
0
 /// <summary/>
 public cudaOccDeviceProp(CudaDeviceProperties props)
 {
     major = props.ComputeCapabilityMajor;
     minor = props.ComputeCapabilityMinor;
     maxThreadsPerBlock          = props.MaxThreadsPerBlock;
     maxThreadsPerMultiProcessor = props.MaxThreadsPerMultiProcessor;
     regsPerBlock          = props.RegistersPerBlock;
     regsPerMultiprocessor = props.MaxRegistersPerMultiprocessor;
     warpSize                   = props.WarpSize;
     sharedMemPerBlock          = props.SharedMemoryPerBlock;
     sharedMemPerMultiprocessor = props.MaxSharedMemoryPerMultiprocessor;
 }
Ejemplo n.º 4
0
			/// <summary/>
			public cudaOccDeviceProp(CudaDeviceProperties props)
			{
				major = props.ComputeCapabilityMajor;
				minor = props.ComputeCapabilityMinor;
				maxThreadsPerBlock = props.MaxThreadsPerBlock;
				maxThreadsPerMultiProcessor = props.MaxThreadsPerMultiProcessor;
				regsPerBlock = props.RegistersPerBlock;
				regsPerMultiprocessor = props.MaxRegistersPerMultiprocessor;
				warpSize = props.WarpSize;
				sharedMemPerBlock = props.SharedMemoryPerBlock;
				sharedMemPerMultiprocessor = props.MaxSharedMemoryPerMultiprocessor;
			}
Ejemplo n.º 5
0
		/// <summary>
		/// Determine the potential block size that allows maximum number of CTAs that can run on multiprocessor simultaneously 
		/// </summary>
		/// <param name="properties"></param>
		/// <param name="kernel"></param>
		/// <param name="state"></param>
		/// <param name="blockSizeToSMem">
		/// A function to convert from block size to dynamic shared memory size.<para/>
		/// e.g.:
		/// If no dynamic shared memory is used: x => 0<para/>
		/// If 4 bytes shared memory per thread is used: x = 4 * x</param>
		/// <returns>maxBlockSize</returns>
		public static int cudaOccMaxPotentialOccupancyBlockSize(
			CudaDeviceProperties properties,
			CudaKernel kernel,
			cudaOccDeviceState state,
			del_blockSizeToDynamicSMemSize blockSizeToSMem)
		{
			cudaOccDeviceProp props = new cudaOccDeviceProp(properties);
			cudaOccFuncAttributes attributes = new cudaOccFuncAttributes(kernel);
			return cudaOccMaxPotentialOccupancyBlockSize(props, attributes, state, blockSizeToSMem);
		}
Ejemplo n.º 6
0
		///////////////////////////////////////////////
		//    Occupancy calculation Functions        //
		///////////////////////////////////////////////

		/// <summary>
		/// Determine the maximum number of CTAs that can be run simultaneously per SM.<para/>
		/// This is equivalent to the calculation done in the CUDA Occupancy Calculator
		/// spreadsheet
		/// </summary>
		/// <param name="properties"></param>
		/// <param name="kernel"></param>
		/// <param name="state"></param>
		/// <returns></returns>
		public static cudaOccResult cudaOccMaxActiveBlocksPerMultiprocessor(
			CudaDeviceProperties properties,
			CudaKernel kernel,
			cudaOccDeviceState state)
		{
			cudaOccDeviceProp props = new cudaOccDeviceProp(properties);
			cudaOccFuncAttributes attributes = new cudaOccFuncAttributes(kernel);

			return cudaOccMaxActiveBlocksPerMultiprocessor(props, attributes, (int)kernel.BlockDimensions.x * (int)kernel.BlockDimensions.y * (int)kernel.BlockDimensions.z, kernel.DynamicSharedMemory, state);			
		}