/// <summary> /// Initializes major vendor features. /// </summary> private void InitVendorFeatures() { // Check major vendor features if (Device.Vendor == CLDeviceVendor.Nvidia || Device.Vendor == CLDeviceVendor.AMD) { return; } // Compile dummy kernel to resolve additional information CLException.ThrowIfFailed(CLKernel.LoadKernel( this, DummyKernelName, DummyKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _)); try { // Resolve information WarpSize = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>( kernelPtr, DeviceId, CLKernelWorkGroupInfoType .CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32(); } finally { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); } }
/// <summary> /// Initializes support for sub groups. /// </summary> /// <param name="acceleratorId">The current accelerator id.</param> private void InitSubGroupSupport(CLDevice acceleratorId) { // Check sub group support Capabilities.SubGroups = acceleratorId.HasAnyExtension(SubGroupExtensions); if (!Capabilities.SubGroups) { return; } // Verify support using a simple kernel if (CLKernel.LoadKernel( this, DummyKernelName, DummySubGroupKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _) == CLError.CL_SUCCESS) { // Some drivers return an internal handler delegate // that crashes during invocation instead of telling that the // sub-group feature is not supported try { var localGroupSizes = new IntPtr[] { new IntPtr(MaxNumThreadsPerGroup) }; Capabilities.SubGroups = acceleratorId.TryGetKernelSubGroupInfo( kernelPtr, DeviceId, CLKernelSubGroupInfoType .CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, localGroupSizes, out IntPtr subGroupSize); WarpSize = subGroupSize.ToInt32(); } catch (AccessViolationException) { // This exception can be raised due to driver issues // on several platforms -> we will just disable sub-group // support for these platforms Capabilities.SubGroups = false; } finally { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); } } }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (kernelPtr != IntPtr.Zero) { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); kernelPtr = IntPtr.Zero; } if (programPtr != IntPtr.Zero) { CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; } base.Dispose(disposing); }
/// <summary> /// Disposes this OpenCL kernel. /// </summary> protected override void DisposeAcceleratorObject(bool disposing) { // Free the kernel if (kernelPtr != IntPtr.Zero) { CLException.VerifyDisposed( disposing, CurrentAPI.ReleaseKernel(kernelPtr)); kernelPtr = IntPtr.Zero; } // Free the surrounding program if (programPtr != IntPtr.Zero) { CLException.VerifyDisposed( disposing, CurrentAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; } }
private void InitVendorFeatures() { // Check major vendor features if (CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV, out int warpSize) == CLError.CL_SUCCESS) { // Nvidia platform WarpSize = warpSize; Vendor = CLAcceleratorVendor.Nvidia; int major = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); int minor = CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); if (major < 7 || major == 7 && minor < 5) { MaxNumThreadsPerMultiprocessor *= 2; } } else if (CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD, out int wavefrontSize) == CLError.CL_SUCCESS) { // AMD platform WarpSize = wavefrontSize; Vendor = CLAcceleratorVendor.AMD; } else { Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ? CLAcceleratorVendor.Intel : CLAcceleratorVendor.Other; // Compile dummy kernel to resolve additional information CLException.ThrowIfFailed(CLKernel.LoadKernel( this, DummyKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _)); try { // Resolve information WarpSize = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>( kernelPtr, DeviceId, CLKernelWorkGroupInfoType .CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32(); } finally { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); } } }