/// <summary> /// Initializes support for sub groups. /// </summary> /// <param name="acceleratorId">The current accelerator id.</param> private void InitSubGroupSupport(CLAcceleratorId acceleratorId) { // Check sub group support Capabilities.SubGroups = acceleratorId.HasAnyExtension(SubGroupExtensions); if (!Capabilities.SubGroups) { return; } // Verify support using a simple kernel if (CLKernel.LoadKernel( this, DummyKernelName, DummySubGroupKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _) == CLError.CL_SUCCESS) { // Some drivers return an internal handler delegate // that crashes during invocation instead of telling that the // sub-group feature is not supported try { var localGroupSizes = new IntPtr[] { new IntPtr(MaxNumThreadsPerGroup) }; Capabilities.SubGroups = acceleratorId.TryGetKernelSubGroupInfo( kernelPtr, DeviceId, CLKernelSubGroupInfoType .CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, localGroupSizes, out IntPtr subGroupSize); WarpSize = subGroupSize.ToInt32(); } catch (AccessViolationException) { // This exception can be raised due to driver issues // on several platforms -> we will just disable sub-group // support for these platforms Capabilities.SubGroups = false; } finally { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); } } }
/// <summary> /// Creates the specified accelerator using the provided accelerator id. /// </summary> /// <param name="context">The ILGPU context.</param> /// <param name="acceleratorId">The specified accelerator id.</param> /// <returns>The created accelerator.</returns> public static Accelerator Create(Context context, AcceleratorId acceleratorId) { if (context == null) { throw new ArgumentNullException(nameof(context)); } return(acceleratorId switch { CPU.CPUAcceleratorId _ => new CPU.CPUAccelerator(context), Cuda.CudaAcceleratorId cudaId => new Cuda.CudaAccelerator(context, cudaId.DeviceId), OpenCL.CLAcceleratorId clId => new OpenCL.CLAccelerator(context, clId), _ => throw new ArgumentException( RuntimeErrorMessages.NotSupportedTargetAccelerator, nameof(acceleratorId)), });
/// <summary> /// Constructs a new OpenCL accelerator. /// </summary> /// <param name="context">The ILGPU context.</param> /// <param name="acceleratorId">The accelerator id.</param> public CLAccelerator(Context context, CLAcceleratorId acceleratorId) : base(context, AcceleratorType.OpenCL) { if (acceleratorId == null) { throw new ArgumentNullException(nameof(acceleratorId)); } PlatformId = acceleratorId.PlatformId; DeviceId = acceleratorId.DeviceId; PlatformName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_NAME); VendorName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_VENDOR); // Create new context CLException.ThrowIfFailed( CLAPI.CreateContext(DeviceId, out contextPtr)); // Resolve device info Name = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_NAME); MemorySize = CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Max grid size int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); // Resolve clock rate ClockRate = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY); // Resolve number of multiprocessors NumMultiprocessors = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; InitVendorFeatures(); InitSubGroupSupport(acceleratorId); Bind(); DefaultStream = CreateStreamInternal(); base.Backend = new CLBackend(Context, Backends.Backend.OSPlatform, Vendor); }
static CLAccelerator() { var accelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); var allAccelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); var devices = new IntPtr[MaxNumDevicesPerPlatform]; try { // Resolve all platforms if (!CurrentAPI.IsSupported || CurrentAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS || numPlatforms < 1) { return; } var platforms = new IntPtr[numPlatforms]; if (CurrentAPI.GetPlatforms(platforms, out numPlatforms) != CLError.CL_SUCCESS) { return; } foreach (var platform in platforms) { // Resolve all devices int numDevices = devices.Length; Array.Clear(devices, 0, numDevices); if (CurrentAPI.GetDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, devices, out numDevices) != CLError.CL_SUCCESS) { continue; } for (int i = 0; i < numDevices; ++i) { // Resolve device and ignore invalid devices var device = devices[i]; if (device == IntPtr.Zero) { continue; } // Check for available device if (CurrentAPI.GetDeviceInfo <int>( device, CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0) { continue; } var acceleratorId = new CLAcceleratorId(platform, device); allAccelerators.Add(acceleratorId); if (acceleratorId.CVersion >= CLBackend.MinimumVersion) { accelerators.Add(acceleratorId); } } } } catch (Exception) { // Ignore API-specific exceptions at this point } finally { CLAccelerators = accelerators.ToImmutable(); AllCLAccelerators = allAccelerators.ToImmutable(); } }
static CLAccelerator() { var accelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); var allAccelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); try { // Resolve all platforms if (CLAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS || numPlatforms < 1) { return; } var platforms = new IntPtr[numPlatforms]; if (CLAPI.GetPlatforms(platforms, out numPlatforms) != CLError.CL_SUCCESS) { return; } foreach (var platform in platforms) { // Resolve all devices if (CLAPI.GetNumDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, out int numDevices) != CLError.CL_SUCCESS) { continue; } var devices = new IntPtr[numDevices]; if (CLAPI.GetDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, devices, out numDevices) != CLError.CL_SUCCESS) { continue; } foreach (var device in devices) { // Check for available device if (CLAPI.GetDeviceInfo <int>( device, CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0) { continue; } var acceleratorId = new CLAcceleratorId(platform, device); if (acceleratorId.CVersion < CLBackend.MinimumVersion) { allAccelerators.Add(acceleratorId); } else { accelerators.Add(acceleratorId); } } } } catch (Exception) { // Ignore API-specific exceptions at this point } finally { CLAccelerators = accelerators.ToImmutable(); AllCLAccelerators = allAccelerators.ToImmutable(); } }