/// <summary> /// Init grid information. /// </summary> private void InitGridInfo() { int workItemDimensions = IntrinsicMath.Max(CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); // OpenCL does not report maximium grid sizes, MaxGridSize value is consistent // with the CPU accelator and values returned by CUDA accelerators. // MaxGridSize is ultimately contrained by system and device memory // and how each kernel manages memory. MaxGridSize = new Index3D(int.MaxValue, ushort.MaxValue, ushort.MaxValue); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Max work item thread dimensions var workItemSizes = new IntPtr[workItemDimensions]; CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGroupSize = new Index3D( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; }
private void InitGenericAddressSpaceSupport() { if (DeviceVersion < CLDeviceVersion.CL20) { Capabilities.GenericAddressSpace = false; } else if (DeviceVersion < CLDeviceVersion.CL30) { Capabilities.GenericAddressSpace = true; } else { try { Capabilities.GenericAddressSpace = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT) != 0; } catch (CLException) { Capabilities.GenericAddressSpace = false; } } }
/// <summary> /// Detects OpenCL devices. /// </summary> /// <param name="predicate"> /// The predicate to include a given device. /// </param> /// <param name="registry">The registry to add all devices to.</param> private static void GetDevicesInternal( Predicate <CLDevice> predicate, DeviceRegistry registry) { var devices = new IntPtr[MaxNumDevicesPerPlatform]; // Resolve all platforms if (!CurrentAPI.IsSupported || CurrentAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS || numPlatforms < 1) { return; } var platforms = new IntPtr[numPlatforms]; if (CurrentAPI.GetPlatforms(platforms, ref numPlatforms) != CLError.CL_SUCCESS) { return; } foreach (var platform in platforms) { // Resolve all devices int numDevices = devices.Length; Array.Clear(devices, 0, numDevices); if (CurrentAPI.GetDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, devices, out numDevices) != CLError.CL_SUCCESS) { continue; } for (int i = 0; i < numDevices; ++i) { // Resolve device and ignore invalid devices var device = devices[i]; if (device == IntPtr.Zero) { continue; } // Check for available device if (CurrentAPI.GetDeviceInfo <int>( device, CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0) { continue; } var desc = new CLDevice(platform, device); registry.Register(desc, predicate); } } }
/// <summary> /// Init general device information. /// </summary> private void InitDeviceInfo() { // Resolve general device information Name = CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_NAME); DeviceType = (CLDeviceType)CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_TYPE); DeviceVersion = CLDeviceVersion.TryParse( CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_VERSION), out var deviceVersion) ? deviceVersion : CLDeviceVersion.CL10; // Resolve clock rate ClockRate = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY); // Resolve number of multiprocessors NumMultiprocessors = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS); }
/// <summary> /// Init grid information. /// </summary> private void InitGridInfo() { // Max grid size int workItemDimensions = IntrinsicMath.Max(CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3D( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); MaxGroupSize = new Index3D( MaxNumThreadsPerGroup, MaxNumThreadsPerGroup, MaxNumThreadsPerGroup); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; }
/// <summary> /// Init general OpenCL extensions. /// </summary> private void InitExtensions() { // Resolve extensions var extensionString = CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_EXTENSIONS); foreach (var extension in extensionString.ToLower().Split(' ')) { extensionSet.Add(extension); } Extensions = extensionSet.ToImmutableArray(); }
/// <summary> /// Init OpenCL C language information. /// </summary> private void InitCInfo() { // Determine the supported OpenCL C version var clVersionString = CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_OPENCL_C_VERSION); if (!CLCVersion.TryParse(clVersionString, out CLCVersion version)) { version = CLCVersion.CL10; } CVersion = version; }
private void InitVendorAndWarpSizeInfo() { VendorName = CurrentAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_VENDOR); // Try to determine the actual vendor if (CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV, out int warpSize) == CLError.CL_SUCCESS) { // Nvidia platform WarpSize = warpSize; Vendor = CLDeviceVendor.Nvidia; int major = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); int minor = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); if (major < 7 || major == 7 && minor < 5) { MaxNumThreadsPerMultiprocessor *= 2; } } else if (CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD, out int wavefrontSize) == CLError.CL_SUCCESS) { // AMD platform WarpSize = wavefrontSize; Vendor = CLDeviceVendor.AMD; } else { Vendor = VendorName.Contains(CLDeviceVendor.Intel.ToString()) ? CLDeviceVendor.Intel : CLDeviceVendor.Other; // Warp size cannot be resolve at this point WarpSize = 0; } }
/// <summary> /// Init memory information. /// </summary> private void InitMemoryInfo() { // Resolve memory size MemorySize = CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); }
/// <summary> /// Constructs a new OpenCL accelerator reference. /// </summary> /// <param name="platformId">The OpenCL platform id.</param> /// <param name="deviceId">The OpenCL device id.</param> public CLAcceleratorId(IntPtr platformId, IntPtr deviceId) : base(AcceleratorType.OpenCL) { if (platformId == IntPtr.Zero) { throw new ArgumentOutOfRangeException(nameof(platformId)); } if (deviceId == IntPtr.Zero) { throw new ArgumentOutOfRangeException(nameof(deviceId)); } PlatformId = platformId; DeviceId = deviceId; DeviceType = (CLDeviceType)CurrentAPI.GetDeviceInfo <long>( deviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Resolve extensions var extensionString = CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_EXTENSIONS); extensionSet = new HashSet <string>( extensionString.ToLower().Split(' ')); Extensions = extensionSet.ToImmutableArray(); // Determine the supported OpenCL C version var clVersionString = CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_OPENCL_C_VERSION); if (!CLCVersion.TryParse(clVersionString, out CLCVersion version)) { version = CLCVersion.CL10; } CVersion = version; // Resolve extension method getKernelSubGroupInfo = CurrentAPI.GetExtension <clGetKernelSubGroupInfoKHR>( platformId); }
private void InitVendorFeatures() { // Check major vendor features if (CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV, out int warpSize) == CLError.CL_SUCCESS) { // Nvidia platform WarpSize = warpSize; Vendor = CLAcceleratorVendor.Nvidia; int major = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); int minor = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); if (major < 7 || major == 7 && minor < 5) { MaxNumThreadsPerMultiprocessor *= 2; } } else if (CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD, out int wavefrontSize) == CLError.CL_SUCCESS) { // AMD platform WarpSize = wavefrontSize; Vendor = CLAcceleratorVendor.AMD; } else { Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ? CLAcceleratorVendor.Intel : CLAcceleratorVendor.Other; // Compile dummy kernel to resolve additional information CLException.ThrowIfFailed(CLKernel.LoadKernel( this, DummyKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _)); try { // Resolve information WarpSize = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>( kernelPtr, DeviceId, CLKernelWorkGroupInfoType .CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32(); } finally { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); } } }
/// <summary> /// Constructs a new OpenCL accelerator. /// </summary> /// <param name="context">The ILGPU context.</param> /// <param name="acceleratorId">The accelerator id.</param> public CLAccelerator(Context context, CLAcceleratorId acceleratorId) : base(context, AcceleratorType.OpenCL) { if (acceleratorId == null) { throw new ArgumentNullException(nameof(acceleratorId)); } PlatformId = acceleratorId.PlatformId; DeviceId = acceleratorId.DeviceId; CVersion = acceleratorId.CVersion; PlatformName = CurrentAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_NAME); VendorName = CurrentAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_VENDOR); // Create new context CLException.ThrowIfFailed( CurrentAPI.CreateContext(DeviceId, out contextPtr)); // Resolve device info Name = CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_NAME); MemorySize = CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); DeviceType = (CLDeviceType)CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Max grid size int workItemDimensions = IntrinsicMath.Max(CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); // Resolve clock rate ClockRate = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY); // Resolve number of multiprocessors NumMultiprocessors = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; InitVendorFeatures(); InitSubGroupSupport(acceleratorId); Bind(); DefaultStream = CreateStreamInternal(); Init(new CLBackend(Context, Vendor)); }
static CLAccelerator() { var accelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); var allAccelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); var devices = new IntPtr[MaxNumDevicesPerPlatform]; try { // Resolve all platforms if (!CurrentAPI.IsSupported || CurrentAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS || numPlatforms < 1) { return; } var platforms = new IntPtr[numPlatforms]; if (CurrentAPI.GetPlatforms(platforms, out numPlatforms) != CLError.CL_SUCCESS) { return; } foreach (var platform in platforms) { // Resolve all devices int numDevices = devices.Length; Array.Clear(devices, 0, numDevices); if (CurrentAPI.GetDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, devices, out numDevices) != CLError.CL_SUCCESS) { continue; } for (int i = 0; i < numDevices; ++i) { // Resolve device and ignore invalid devices var device = devices[i]; if (device == IntPtr.Zero) { continue; } // Check for available device if (CurrentAPI.GetDeviceInfo <int>( device, CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0) { continue; } var acceleratorId = new CLAcceleratorId(platform, device); allAccelerators.Add(acceleratorId); if (acceleratorId.CVersion >= CLBackend.MinimumVersion) { accelerators.Add(acceleratorId); } } } } catch (Exception) { // Ignore API-specific exceptions at this point } finally { CLAccelerators = accelerators.ToImmutable(); AllCLAccelerators = allAccelerators.ToImmutable(); } }
/// <summary> /// Resolves device information as typed structure value of type /// <typeparamref name="T"/>. /// </summary> /// <typeparam name="T">The target type.</typeparam> /// <param name="type">The information type.</param> /// <returns>The resolved value.</returns> public T GetDeviceInfo <T>(CLDeviceInfoType type) where T : unmanaged => CurrentAPI.GetDeviceInfo <T>(DeviceId, type);
/// <summary> /// Resolves device information as typed structure value of type /// <typeparamref name="T"/>. /// </summary> /// <typeparam name="T">The target type.</typeparam> /// <param name="type">The information type.</param> /// <param name="value">The resolved value.</param> /// <returns>The error code.</returns> public CLError GetDeviceInfo <T>(CLDeviceInfoType type, out T value) where T : unmanaged => CurrentAPI.GetDeviceInfo(DeviceId, type, out value);