/// <summary> /// Constructs a new OpenCL accelerator reference. /// </summary> /// <param name="platformId">The OpenCL platform id.</param> /// <param name="deviceId">The OpenCL device id.</param> public CLAcceleratorId(IntPtr platformId, IntPtr deviceId) : base(AcceleratorType.OpenCL) { if (platformId == IntPtr.Zero) { throw new ArgumentOutOfRangeException(nameof(platformId)); } if (deviceId == IntPtr.Zero) { throw new ArgumentOutOfRangeException(nameof(deviceId)); } PlatformId = platformId; DeviceId = deviceId; DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>( deviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Resolve extensions var extensionString = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_EXTENSIONS); extensionSet = new HashSet <string>( extensionString.ToLower().Split(' ')); Extensions = extensionSet.ToImmutableArray(); // Resolve extension method getKernelSubGroupInfo = CLAPI.GetExtension <clGetKernelSubGroupInfoKHR>(platformId); }
/// <summary> /// Loads the binary representation of the given OpenCL kernel. /// </summary> /// <param name="program">The program pointer.</param> /// <returns>The binary representation of the underlying kernel.</returns> public static unsafe byte[] LoadBinaryRepresentation(IntPtr program) { IntPtr kernelSize; CLException.ThrowIfFailed( CLAPI.GetProgramInfo( program, CLProgramInfo.CL_PROGRAM_BINARY_SIZES, new IntPtr(IntPtr.Size), &kernelSize, out var _)); var programBinary = new byte[kernelSize.ToInt32()]; fixed(byte *binPtr = &programBinary[0]) { CLException.ThrowIfFailed( CLAPI.GetProgramInfo( program, CLProgramInfo.CL_PROGRAM_BINARIES, new IntPtr(IntPtr.Size), &binPtr, out var _)); } return(programBinary); }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, Index targetOffset) { var clStream = (CLStream)stream; switch (source.AcceleratorType) { case AcceleratorType.CPU: CLException.ThrowIfFailed( CLAPI.WriteBuffer( clStream.CommandQueue, NativePtr, false, new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes), new IntPtr(source.LoadEffectiveAddress()))); break; case AcceleratorType.OpenCL: CLException.ThrowIfFailed( CLAPI.CopyBuffer( clStream.CommandQueue, source.Source.NativePtr, NativePtr, new IntPtr(source.Index * ElementSize), new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes))); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } }
private void InitVendorFeatures() { // Check major vendor features if (CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV, out int warpSize) == CLError.CL_SUCCESS) { // Nvidia platform WarpSize = warpSize; Vendor = CLAcceleratorVendor.Nvidia; int major = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); int minor = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); if (major < 7 || major == 7 && minor < 5) { MaxNumThreadsPerMultiprocessor *= 2; } } else if (CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD, out int wavefrontSize) == CLError.CL_SUCCESS) { // AMD platform WarpSize = wavefrontSize; Vendor = CLAcceleratorVendor.AMD; } else { Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ? CLAcceleratorVendor.Intel : CLAcceleratorVendor.Other; // Compile dummy kernel to resolve additional information CLException.ThrowIfFailed(CLKernel.LoadKernel( this, DummyKernelSource, out IntPtr programPtr, out IntPtr kernelPtr)); try { // Resolve information WarpSize = CLAPI.GetKernelWorkGroupInfo <IntPtr>( kernelPtr, DeviceId, CLKernelWorkGroupInfoType.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32(); } finally { CLException.ThrowIfFailed( CLAPI.ReleaseKernel(kernelPtr) | CLAPI.ReleaseProgram(programPtr)); } } }
/// <summary cref="Accelerator.EstimateGroupSizeInternal( /// Kernel, int, int, out int)"/> protected override int EstimateGroupSizeInternal( Kernel kernel, int dynamicSharedMemorySizeInBytes, int maxGroupSize, out int minGridSize) { if (dynamicSharedMemorySizeInBytes > 0) { throw new ArgumentOutOfRangeException( nameof(dynamicSharedMemorySizeInBytes)); } if (maxGroupSize < 1) { maxGroupSize = MaxNumThreadsPerGroup; } var clKernel = kernel as CLKernel; var workGroupSizeNative = CLAPI.GetKernelWorkGroupInfo <IntPtr>( clKernel.KernelPtr, DeviceId, CLKernelWorkGroupInfoType.CL_KERNEL_WORK_GROUP_SIZE); int workGroupSize = workGroupSizeNative.ToInt32(); workGroupSize = IntrinsicMath.Min(workGroupSize, maxGroupSize); minGridSize = IntrinsicMath.DivRoundUp(MaxNumThreads, workGroupSize); return(workGroupSize); }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { base.Dispose(disposing); CLException.ThrowIfFailed( CLAPI.ReleaseContext(contextPtr)); contextPtr = IntPtr.Zero; }
/// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/> public override void MemSetToZero(AcceleratorStream stream) => CLException.ThrowIfFailed( CLAPI.FillBuffer <byte>( ((CLStream)stream).CommandQueue, NativePtr, 0, IntPtr.Zero, new IntPtr(LengthInBytes)));
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (DeviceId != IntPtr.Zero) { CLAPI.ReleaseDevice(DeviceId); DeviceId = IntPtr.Zero; } base.Dispose(disposing); }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { CLException.ThrowIfFailed( CLAPI.ReleaseKernel(kernelPtr) | CLAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; kernelPtr = IntPtr.Zero; }
internal CLStream(CLAccelerator accelerator) : base(accelerator) { CLException.ThrowIfFailed( CLAPI.CreateCommandQueue( accelerator.DeviceId, accelerator.ContextPtr, out queuePtr)); }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (queuePtr != IntPtr.Zero) { CLException.ThrowIfFailed( CLAPI.ReleaseCommandQueue(queuePtr)); } queuePtr = IntPtr.Zero; }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (NativePtr != IntPtr.Zero) { CLException.ThrowIfFailed( CLAPI.ReleaseBuffer(NativePtr)); NativePtr = IntPtr.Zero; } base.Dispose(disposing); }
/// <summary> /// Constructs a new OpenCL buffer. /// </summary> /// <param name="accelerator">The accelerator.</param> /// <param name="extent">The extent.</param> internal CLMemoryBuffer(CLAccelerator accelerator, TIndex extent) : base(accelerator, extent) { CLException.ThrowIfFailed( CLAPI.CreateBuffer( accelerator.ContextPtr, CLBufferFlags.CL_MEM_KERNEL_READ_AND_WRITE, new IntPtr(extent.Size * ElementSize), IntPtr.Zero, out IntPtr resultPtr)); NativePtr = resultPtr; }
private void InitSubGroupSupport(CLAcceleratorId acceleratorId) { // Check sub group support if (!(SubGroupSupport = acceleratorId.HasAnyExtension(SubGroupExtensions))) { return; } // Verify support using a simple kernel if (CLKernel.LoadKernel( this, DummySubGroupKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _) == CLError.CL_SUCCESS) { // Some drivers return an internal handler delegate // that crashes during invocation instead of telling that the // sub-group feature is not supported try { var localGroupSizes = new IntPtr[] { new IntPtr(MaxNumThreadsPerGroup) }; SubGroupSupport = acceleratorId.TryGetKernelSubGroupInfo( kernelPtr, DeviceId, CLKernelSubGroupInfoType .CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, localGroupSizes, out IntPtr subGroupSize); WarpSize = subGroupSize.ToInt32(); } catch (AccessViolationException) { // This exception can be raised due to driver issues // on several platforms -> we will just disable sub-group // support for these platforms SubGroupSupport = false; } finally { CLException.ThrowIfFailed( CLAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CLAPI.ReleaseProgram(programPtr)); } } }
/// <summary> /// Loads the given OpenCL kernel. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="source">The OpenCL source code.</param> /// <param name="version">The OpenCL C version.</param> /// <param name="programPtr">The created program pointer.</param> /// <param name="kernelPtr">The created kernel pointer.</param> /// <param name="errorLog">The error log (if any).</param> /// <returns>True, if the program and the kernel could be loaded successfully.</returns> internal static CLError LoadKernel( CLAccelerator accelerator, string source, CLCVersion version, out IntPtr programPtr, out IntPtr kernelPtr, out string errorLog) { errorLog = null; kernelPtr = IntPtr.Zero; var programError = CLAPI.CreateProgram( accelerator.ContextPtr, source, out programPtr); if (programError != CLError.CL_SUCCESS) { return(programError); } // Specify the OpenCL C version. string options = "-cl-std=" + version.ToString(); var buildError = CLAPI.BuildProgram( programPtr, accelerator.DeviceId, options); if (buildError != CLError.CL_SUCCESS) { CLException.ThrowIfFailed( CLAPI.GetProgramBuildLog( programPtr, accelerator.DeviceId, out errorLog)); CLException.ThrowIfFailed( CLAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; return(buildError); } return(CLAPI.CreateKernel( programPtr, CLCompiledKernel.EntryName, out kernelPtr)); }
/// <summary> /// Constructs a new OpenCL accelerator reference. /// </summary> /// <param name="platformId">The OpenCL platform id.</param> /// <param name="deviceId">The OpenCL device id.</param> public CLAcceleratorId(IntPtr platformId, IntPtr deviceId) : base(AcceleratorType.OpenCL) { if (platformId == IntPtr.Zero) { throw new ArgumentOutOfRangeException(nameof(platformId)); } if (deviceId == IntPtr.Zero) { throw new ArgumentOutOfRangeException(nameof(deviceId)); } PlatformId = platformId; DeviceId = deviceId; DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>( deviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Resolve extensions var extensionString = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_EXTENSIONS); extensionSet = new HashSet <string>( extensionString.ToLower().Split(' ')); Extensions = extensionSet.ToImmutableArray(); // Determine the supported OpenCL C version var clVersionString = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_OPENCL_C_VERSION); if (!CLCVersion.TryParse(clVersionString, out CLCVersion version)) { version = CLCVersion.CL10; } CVersion = version; // Resolve extension method getKernelSubGroupInfo = CLAPI.GetExtension <clGetKernelSubGroupInfoKHR>( platformId); }
/// <summary> /// Loads the given OpenCL kernel. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="source">The OpenCL source code.</param> /// <param name="programPtr">The created program pointer.</param> /// <param name="kernelPtr">The created kernel pointer.</param> /// <returns>True, if the program and the kernel could be loaded successfully.</returns> internal static CLError LoadKernel( CLAccelerator accelerator, string source, out IntPtr programPtr, out IntPtr kernelPtr) { kernelPtr = IntPtr.Zero; var error = CLAPI.CreateProgram( accelerator.ContextPtr, source, out programPtr); if (error != CLError.CL_SUCCESS) { return(error); } // TODO: OpenCL compiler options string options = string.Empty; error |= CLAPI.BuildProgram( programPtr, accelerator.DeviceId, options); error |= CLAPI.CreateKernel( programPtr, CLCompiledKernel.EntryName, out kernelPtr); if (error != CLError.CL_SUCCESS) { CLException.ThrowIfFailed( CLAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; } return(error); }
/// <summary> /// Resolves device information as typed structure value of type /// <typeparamref name="T"/>. /// </summary> /// <typeparam name="T">The target type.</typeparam> /// <param name="type">The information type.</param> /// <returns>The resolved value.</returns> public T GetDeviceInfo <T>(CLDeviceInfoType type) where T : unmanaged => CLAPI.GetDeviceInfo <T>(DeviceId, type);
/// <summary cref="AcceleratorStream.Synchronize"/> public override void Synchronize() { CLException.ThrowIfFailed( CLAPI.FinishCommandQueue(queuePtr)); }
static CLAccelerator() { var accelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); var allAccelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); var devices = new IntPtr[MaxNumDevicesPerPlatform]; try { // Resolve all platforms if (CLAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS || numPlatforms < 1) { return; } var platforms = new IntPtr[numPlatforms]; if (CLAPI.GetPlatforms(platforms, out numPlatforms) != CLError.CL_SUCCESS) { return; } foreach (var platform in platforms) { // Resolve all devices int numDevices = devices.Length; Array.Clear(devices, 0, numDevices); if (CLAPI.GetDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, devices, out numDevices) != CLError.CL_SUCCESS) { continue; } for (int i = 0; i < numDevices; ++i) { // Resolve device and ignore invalid devices var device = devices[i]; if (device == IntPtr.Zero) { continue; } // Check for available device if (CLAPI.GetDeviceInfo <int>( device, CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0) { continue; } var acceleratorId = new CLAcceleratorId(platform, device); allAccelerators.Add(acceleratorId); if (acceleratorId.CVersion >= CLBackend.MinimumVersion) { accelerators.Add(acceleratorId); } } } } catch (Exception) { // Ignore API-specific exceptions at this point } finally { CLAccelerators = accelerators.ToImmutable(); AllCLAccelerators = allAccelerators.ToImmutable(); } }
/// <summary> /// Resolves device information as typed structure value of type /// <typeparamref name="T"/>. /// </summary> /// <typeparam name="T">The target type.</typeparam> /// <param name="type">The information type.</param> /// <param name="value">The resolved value.</param> /// <returns>The error code.</returns> public CLError GetDeviceInfo <T>(CLDeviceInfoType type, out T value) where T : unmanaged => CLAPI.GetDeviceInfo(DeviceId, type, out value);
static CLAccelerator() { var accelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>(); try { // Resolve all platforms if (CLAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS || numPlatforms < 1) { return; } var platforms = new IntPtr[numPlatforms]; if (CLAPI.GetPlatforms(platforms, out numPlatforms) != CLError.CL_SUCCESS) { return; } foreach (var platform in platforms) { // Resolve all devices if (CLAPI.GetNumDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, out int numDevices) != CLError.CL_SUCCESS) { continue; } var devices = new IntPtr[numDevices]; if (CLAPI.GetDevices( platform, CLDeviceType.CL_DEVICE_TYPE_ALL, devices, out numDevices) != CLError.CL_SUCCESS) { continue; } foreach (var device in devices) { // Check for available device if (CLAPI.GetDeviceInfo <int>( device, CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0) { continue; } accelerators.Add(new CLAcceleratorId( platform, device)); } } } catch (Exception) { // Ignore API-specific exceptions at this point } finally { CLAccelerators = accelerators.ToImmutable(); } }
/// <summary> /// Constructs a new OpenCL accelerator. /// </summary> /// <param name="context">The ILGPU context.</param> /// <param name="acceleratorId">The accelerator id.</param> public CLAccelerator(Context context, CLAcceleratorId acceleratorId) : base(context, AcceleratorType.OpenCL) { if (acceleratorId == null) { throw new ArgumentNullException(nameof(acceleratorId)); } PlatformId = acceleratorId.PlatformId; DeviceId = acceleratorId.DeviceId; PlatformName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_NAME); VendorName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_VENDOR); // Create new context CLException.ThrowIfFailed( CLAPI.CreateContext(DeviceId, out contextPtr)); // Resolve device info Name = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_NAME); MemorySize = CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Max grid size int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); // Resolve clock rate ClockRate = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY); // Resolve number of multiprocessors NumMultiprocessors = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; InitVendorFeatures(); InitSubGroupSupport(acceleratorId); Bind(); DefaultStream = CreateStreamInternal(); base.Backend = new CLBackend(Context, Backends.Backend.OSPlatform, Vendor); }