/// <summary> /// Setups all required settings. /// </summary> private void SetupAccelerator() { Bind(); CudaException.ThrowIfFailed( CurrentAPI.GetDeviceName(out string name, DeviceId)); Name = name; DefaultStream = new CudaStream(this, IntPtr.Zero); CudaException.ThrowIfFailed( CurrentAPI.GetTotalDeviceMemory(out long total, DeviceId)); MemorySize = total; // Resolve max grid size MaxGridSize = new Index3( CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, DeviceId)); // Resolve max group size MaxGroupSize = new Index3( CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, DeviceId)); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, DeviceId); // Resolve max shared memory per block MaxSharedMemoryPerGroup = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, DeviceId); // Resolve total constant memory MaxConstantMemory = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, DeviceId); // Resolve clock rate ClockRate = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, DeviceId); // Resolve warp size WarpSize = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, DeviceId); // Resolve number of multiprocessors NumMultiprocessors = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, DeviceId); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, DeviceId); // Resolve cache configuration CudaException.ThrowIfFailed( CurrentAPI.GetSharedMemoryConfig(out sharedMemoryConfiguration)); CudaException.ThrowIfFailed( CurrentAPI.GetCacheConfig(out cacheConfiguration)); // Setup architecture and backend CudaException.ThrowIfFailed( CurrentAPI.GetDeviceComputeCapability( out int major, out int minor, DeviceId)); Architecture = PTXArchitectureUtils.GetArchitecture(major, minor); CudaException.ThrowIfFailed( CurrentAPI.GetDriverVersion(out var driverVersion)); InstructionSet = GetInstructionSet(Architecture, driverVersion); Init(new PTXBackend( Context, Architecture, InstructionSet)); }
/// <summary> /// Setups all required settings. /// </summary> private void SetupAccelerator() { Bind(); CudaException.ThrowIfFailed( CurrentAPI.GetDeviceName(out string name, DeviceId)); Name = name; DefaultStream = new CudaStream(this, IntPtr.Zero, false); CudaException.ThrowIfFailed( CurrentAPI.GetTotalDeviceMemory(out long total, DeviceId)); MemorySize = total; // Resolve max grid size MaxGridSize = new Index3( CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, DeviceId)); // Resolve max group size MaxGroupSize = new Index3( CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, DeviceId), CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, DeviceId)); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, DeviceId); // Resolve max shared memory per block MaxSharedMemoryPerGroup = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, DeviceId); // Resolve total constant memory MaxConstantMemory = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, DeviceId); // Resolve clock rate ClockRate = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, DeviceId) / 1000; // Resolve memory clock rate MemoryClockRate = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, DeviceId) / 1000; // Resolve the bus width MemoryBusWidth = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, DeviceId); // Resolve warp size WarpSize = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, DeviceId); // Resolve number of multiprocessors NumMultiprocessors = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, DeviceId); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, DeviceId); // Resolve the L2 cache size L2CacheSize = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, DeviceId); // Resolve the maximum amount of shared memory per multiprocessor MaxSharedMemoryPerMultiprocessor = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, DeviceId); // Resolve the total number of registers per multiprocessor TotalNumRegistersPerMultiprocessor = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, DeviceId); // Resolve the total number of registers per group TotalNumRegistersPerGroup = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, DeviceId); // Resolve the max memory pitch MaxMemoryPitch = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, DeviceId); // Resolve the number of concurrent copy engines NumConcurrentCopyEngines = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, DeviceId); // Resolve whether this device has ECC support HasECCSupport = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, DeviceId) != 0; // Resolve whether this device supports managed memory SupportsManagedMemory = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, DeviceId) != 0; // Resolve whether this device supports compute preemption SupportsComputePreemption = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, DeviceId) != 0; // Resolve the current driver mode DriverMode = (DeviceDriverMode)CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, DeviceId); // Resolve the PCI domain id PCIDomainId = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, DeviceId); // Resolve the PCI device id PCIBusId = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, DeviceId); // Resolve the PCI device id PCIDeviceId = CurrentAPI.GetDeviceAttribute( DeviceAttribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, DeviceId); // Resolve cache configuration CudaException.ThrowIfFailed( CurrentAPI.GetSharedMemoryConfig(out sharedMemoryConfiguration)); CudaException.ThrowIfFailed( CurrentAPI.GetCacheConfig(out cacheConfiguration)); // Setup architecture and backend CudaException.ThrowIfFailed( CurrentAPI.GetDeviceComputeCapability( out int major, out int minor, DeviceId)); Architecture = PTXArchitectureUtils.GetArchitecture(major, minor); CudaException.ThrowIfFailed( CurrentAPI.GetDriverVersion(out var driverVersion)); DriverVersion = driverVersion; InstructionSet = GetInstructionSet(Architecture, driverVersion); base.Capabilities = new CudaCapabilityContext(Architecture); Init(new PTXBackend( Context, Capabilities, Architecture, InstructionSet)); }