/// <summary> /// Setups all required settings. /// </summary> private void SetupAccelerator() { Bind(); CudaException.ThrowIfFailed( CurrentAPI.GetDeviceName(out string name, DeviceId)); Name = name; DefaultStream = new CudaStream(this, IntPtr.Zero); CudaException.ThrowIfFailed( CurrentAPI.GetDeviceComputeCapability(out int major, out int minor, DeviceId)); Architecture = PTXBackend.GetArchitecture(major, minor); CudaException.ThrowIfFailed( CurrentAPI.GetTotalDeviceMemory(out long total, DeviceId)); MemorySize = total; // Resolve max grid size MaxGridSize = new Index3( CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, DeviceId)); // Resolve max group size MaxGroupSize = new Index3( CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, DeviceId)); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, DeviceId); // Resolve max shared memory per block MaxSharedMemoryPerGroup = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, DeviceId); // Resolve total constant memory MaxConstantMemory = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, DeviceId); // Resolve clock rate ClockRate = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, DeviceId); // Resolve warp size WarpSize = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, DeviceId); // Resolve number of multiprocessors NumMultiprocessors = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, DeviceId); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, DeviceId); // Resolve cache configuration CudaException.ThrowIfFailed( CurrentAPI.GetSharedMemoryConfig(out sharedMemoryConfiguration)); CudaException.ThrowIfFailed( CurrentAPI.GetCacheConfig(out cacheConfiguration)); }