/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, Index targetOffset) { var clStream = (CLStream)stream; switch (source.AcceleratorType) { case AcceleratorType.CPU: CLException.ThrowIfFailed( CLAPI.WriteBuffer( clStream.CommandQueue, NativePtr, false, new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes), new IntPtr(source.LoadEffectiveAddress()))); break; case AcceleratorType.OpenCL: CLException.ThrowIfFailed( CLAPI.CopyBuffer( clStream.CommandQueue, source.Source.NativePtr, NativePtr, new IntPtr(source.Index * ElementSize), new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes))); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } }
/// <inheritdoc/> public unsafe override void Synchronize() { ReadOnlySpan <IntPtr> events = stackalloc[] { EventPtr }; CLException.ThrowIfFailed( CurrentAPI.WaitForEvents(events)); }
private void InitVendorFeatures() { // Check major vendor features if (CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV, out int warpSize) == CLError.CL_SUCCESS) { // Nvidia platform WarpSize = warpSize; Vendor = CLAcceleratorVendor.Nvidia; int major = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); int minor = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); if (major < 7 || major == 7 && minor < 5) { MaxNumThreadsPerMultiprocessor *= 2; } } else if (CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD, out int wavefrontSize) == CLError.CL_SUCCESS) { // AMD platform WarpSize = wavefrontSize; Vendor = CLAcceleratorVendor.AMD; } else { Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ? CLAcceleratorVendor.Intel : CLAcceleratorVendor.Other; // Compile dummy kernel to resolve additional information CLException.ThrowIfFailed(CLKernel.LoadKernel( this, DummyKernelSource, out IntPtr programPtr, out IntPtr kernelPtr)); try { // Resolve information WarpSize = CLAPI.GetKernelWorkGroupInfo <IntPtr>( kernelPtr, DeviceId, CLKernelWorkGroupInfoType.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32(); } finally { CLException.ThrowIfFailed( CLAPI.ReleaseKernel(kernelPtr) | CLAPI.ReleaseProgram(programPtr)); } } }
/// <summary> /// Initializes major vendor features. /// </summary> private void InitVendorFeatures() { // Check major vendor features if (Device.Vendor == CLDeviceVendor.Nvidia || Device.Vendor == CLDeviceVendor.AMD) { return; } // Compile dummy kernel to resolve additional information CLException.ThrowIfFailed(CLKernel.LoadKernel( this, DummyKernelName, DummyKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _)); try { // Resolve information WarpSize = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>( kernelPtr, DeviceId, CLKernelWorkGroupInfoType .CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32(); } finally { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); } }
/// <summary> /// Loads a compiled kernel into the given OpenCL context as kernel program. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="kernel">The source kernel.</param> /// <param name="launcher">The launcher method for the given kernel.</param> public CLKernel( CLAccelerator accelerator, CLCompiledKernel kernel, MethodInfo launcher) : base(accelerator, kernel, launcher) { var errorCode = LoadKernel( accelerator, kernel.Name, kernel.Source, kernel.CVersion, out programPtr, out kernelPtr, out var errorLog); if (errorCode != CLError.CL_SUCCESS) { Trace.WriteLine("Kernel loading failed:"); if (string.IsNullOrWhiteSpace(errorLog)) { Trace.WriteLine(">> No error information available"); } else { Trace.WriteLine(errorLog); } } CLException.ThrowIfFailed(errorCode); }
/// <inheritdoc/> protected override void DisposeAcceleratorObject(bool disposing) { CLException.VerifyDisposed( disposing, CurrentAPI.clReleaseEvent(EventPtr)); EventPtr = IntPtr.Zero; }
/// <summary> /// Disposes this OpenCL buffer. /// </summary> protected override void DisposeAcceleratorObject(bool disposing) { CLException.VerifyDisposed( disposing, CurrentAPI.ReleaseBuffer(NativePtr)); NativePtr = IntPtr.Zero; }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { base.Dispose(disposing); CLException.ThrowIfFailed( CLAPI.ReleaseContext(contextPtr)); contextPtr = IntPtr.Zero; }
/// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/> public override void MemSetToZero(AcceleratorStream stream) => CLException.ThrowIfFailed( CLAPI.FillBuffer <byte>( ((CLStream)stream).CommandQueue, NativePtr, 0, IntPtr.Zero, new IntPtr(LengthInBytes)));
/// <inheritdoc/> public unsafe override void Synchronize() { using var binding = Accelerator.BindScoped(); ReadOnlySpan <IntPtr> events = stackalloc[] { EventPtr }; CLException.ThrowIfFailed( CurrentAPI.WaitForEvents(events)); }
internal CLStream(CLAccelerator accelerator) : base(accelerator) { CLException.ThrowIfFailed( CLAPI.CreateCommandQueue( accelerator.DeviceId, accelerator.ContextPtr, out queuePtr)); }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { CLException.ThrowIfFailed( CLAPI.ReleaseKernel(kernelPtr) | CLAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; kernelPtr = IntPtr.Zero; }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (queuePtr != IntPtr.Zero) { CLException.ThrowIfFailed( CLAPI.ReleaseCommandQueue(queuePtr)); } queuePtr = IntPtr.Zero; }
internal CLStream(CLAccelerator accelerator) : base(accelerator) { CLException.ThrowIfFailed( CurrentAPI.CreateCommandQueue( accelerator.DeviceId, accelerator.ContextPtr, out queuePtr)); responsibleForHandle = true; }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (responsibleForHandle && queuePtr != IntPtr.Zero) { CLException.ThrowIfFailed( CurrentAPI.ReleaseCommandQueue(queuePtr)); queuePtr = IntPtr.Zero; } base.Dispose(disposing); }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (NativePtr != IntPtr.Zero) { CLException.ThrowIfFailed( CurrentAPI.ReleaseBuffer(NativePtr)); NativePtr = IntPtr.Zero; } base.Dispose(disposing); }
/// <summary> /// Initializes support for sub groups. /// </summary> /// <param name="acceleratorId">The current accelerator id.</param> private void InitSubGroupSupport(CLDevice acceleratorId) { // Check sub group support Capabilities.SubGroups = acceleratorId.HasAnyExtension(SubGroupExtensions); if (!Capabilities.SubGroups) { return; } // Verify support using a simple kernel if (CLKernel.LoadKernel( this, DummyKernelName, DummySubGroupKernelSource, CVersion, out IntPtr programPtr, out IntPtr kernelPtr, out var _) == CLError.CL_SUCCESS) { // Some drivers return an internal handler delegate // that crashes during invocation instead of telling that the // sub-group feature is not supported try { var localGroupSizes = new IntPtr[] { new IntPtr(MaxNumThreadsPerGroup) }; Capabilities.SubGroups = acceleratorId.TryGetKernelSubGroupInfo( kernelPtr, DeviceId, CLKernelSubGroupInfoType .CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, localGroupSizes, out IntPtr subGroupSize); WarpSize = subGroupSize.ToInt32(); } catch (AccessViolationException) { // This exception can be raised due to driver issues // on several platforms -> we will just disable sub-group // support for these platforms Capabilities.SubGroups = false; } finally { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); } } }
internal CLKernel( CLAccelerator accelerator, CLCompiledKernel kernel, MethodInfo launcher) : base(accelerator, kernel, launcher) { CLException.ThrowIfFailed(LoadKernel( accelerator, kernel.Source, out programPtr, out kernelPtr)); }
/// <summary> /// Disposes this OpenCL stream. /// </summary> protected override void DisposeAcceleratorObject(bool disposing) { if (!responsibleForHandle || queuePtr == IntPtr.Zero) { return; } CLException.VerifyDisposed( disposing, CurrentAPI.ReleaseCommandQueue(queuePtr)); queuePtr = IntPtr.Zero; }
/// <summary cref="Accelerator.Synchronize"/> protected unsafe override void SynchronizeInternal() { // All the events to wait on. Each event represents the completion // of all operations queued prior to said event. var streamInstances = InlineList <CLStream> .Create(4); var streamEvents = InlineList <IntPtr> .Create(4); try { ForEachChildObject <CLStream>(stream => { // Ignore disposed command queues at this point if (stream.CommandQueue == IntPtr.Zero) { return; } // Low cost IntPtr* (cl_event*) allocation IntPtr *resultEvent = stackalloc IntPtr[1]; CLException.ThrowIfFailed( CurrentAPI.EnqueueBarrierWithWaitList( stream.CommandQueue, Array.Empty <IntPtr>(), resultEvent)); // Dereference the pointer so we can store it streamEvents.Add(*resultEvent); // Keep the stream instance alive to avoid automatic disposal streamInstances.Add(stream); }); // Wait for all the events to fire, which would mean all operations // queued on an accelerator prior to synchronization have finished if (streamEvents.Count > 0) { CLException.ThrowIfFailed( CurrentAPI.WaitForEvents(streamEvents)); } } finally { // Clean up the events we made foreach (var streamEvent in streamEvents) { CLException.ThrowIfFailed( CurrentAPI.ReleaseEvent(streamEvent)); } } }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (kernelPtr != IntPtr.Zero) { CLException.ThrowIfFailed( CurrentAPI.ReleaseKernel(kernelPtr)); kernelPtr = IntPtr.Zero; } if (programPtr != IntPtr.Zero) { CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; } base.Dispose(disposing); }
/// <summary> /// Loads the given OpenCL kernel. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="name">The name of the entry-point function.</param> /// <param name="source">The OpenCL source code.</param> /// <param name="version">The OpenCL C version.</param> /// <param name="programPtr">The created program pointer.</param> /// <param name="kernelPtr">The created kernel pointer.</param> /// <param name="errorLog">The error log (if any).</param> /// <returns> /// True, if the program and the kernel could be loaded successfully. /// </returns> public static CLError LoadKernel( CLAccelerator accelerator, string name, string source, CLCVersion version, out IntPtr programPtr, out IntPtr kernelPtr, out string errorLog) { errorLog = null; kernelPtr = IntPtr.Zero; var programError = CurrentAPI.CreateProgram( accelerator.NativePtr, source, out programPtr); if (programError != CLError.CL_SUCCESS) { return(programError); } // Specify the OpenCL C version. string options = "-cl-std=" + version.ToString(); var buildError = CurrentAPI.BuildProgram( programPtr, accelerator.DeviceId, options); if (buildError != CLError.CL_SUCCESS) { CLException.ThrowIfFailed( CurrentAPI.GetProgramBuildLog( programPtr, accelerator.DeviceId, out errorLog)); CLException.ThrowIfFailed( CurrentAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; return(buildError); } return(CurrentAPI.CreateKernel( programPtr, name, out kernelPtr)); }
internal CLStream(CLAccelerator accelerator) : base(accelerator) { CLCommandQueueProperties properties = Accelerator.Context.Properties.EnableProfiling ? CLCommandQueueProperties.CL_QUEUE_PROFILING_ENABLE : default; CLException.ThrowIfFailed( CurrentAPI.CreateCommandQueue( accelerator.PlatformVersion, accelerator.DeviceId, accelerator.NativePtr, properties, out queuePtr)); responsibleForHandle = true; }
/// <inheritdoc/> protected internal override unsafe void MemSetInternal( AcceleratorStream stream, byte value, long offsetInBytes, long lengthInBytes) { var binding = Accelerator.BindScoped(); CLException.ThrowIfFailed( CurrentAPI.FillBuffer( ((CLStream)stream).CommandQueue, NativePtr, value, new IntPtr(offsetInBytes), new IntPtr(lengthInBytes))); binding.Recover(); }
public readonly CLError PreLaunchKernel( CLStream stream, CLKernel kernel, RuntimeKernelConfig config) { // Allocate local buffer of desired size. CLException.ThrowIfFailed( CurrentAPI.SetKernelArgumentUnsafeWithKernel( kernel, 0, config.SharedMemoryConfig.DynamicArraySize, null)); // The length of the local buffer (in bytes). return(CurrentAPI.SetKernelArgument( kernel.KernelPtr, 1, config.SharedMemoryConfig.DynamicArraySize)); }
/// <inheritdoc/> protected unsafe override ProfilingMarker AddProfilingMarkerInternal() { IntPtr *profilingEvent = stackalloc IntPtr[1]; CLException.ThrowIfFailed( CurrentAPI.EnqueueBarrierWithWaitList( queuePtr, Array.Empty <IntPtr>(), profilingEvent)); // WORKAROUND: The OpenCL event needs to be awaited now, otherwise // it does not contain the correct timing - it appears to have the timing // of whenever it gets awaited. var marker = new CLProfilingMarker(*profilingEvent); marker.Synchronize(); return(marker); }
/// <summary> /// Disposes this OpenCL kernel. /// </summary> protected override void DisposeAcceleratorObject(bool disposing) { // Free the kernel if (kernelPtr != IntPtr.Zero) { CLException.VerifyDisposed( disposing, CurrentAPI.ReleaseKernel(kernelPtr)); kernelPtr = IntPtr.Zero; } // Free the surrounding program if (programPtr != IntPtr.Zero) { CLException.VerifyDisposed( disposing, CurrentAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; } }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyToView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyToView( AcceleratorStream stream, ArrayView <T> target, LongIndex1 sourceOffset) { var binding = Accelerator.BindScoped(); var clStream = (CLStream)stream; switch (target.AcceleratorType) { case AcceleratorType.CPU: CLException.ThrowIfFailed( CurrentAPI.ReadBuffer( clStream.CommandQueue, NativePtr, false, new IntPtr(sourceOffset * ElementSize), new IntPtr(target.LengthInBytes), new IntPtr(target.LoadEffectiveAddress()))); break; case AcceleratorType.OpenCL: CLException.ThrowIfFailed( CurrentAPI.CopyBuffer( clStream.CommandQueue, NativePtr, target.Source.NativePtr, new IntPtr(sourceOffset * ElementSize), new IntPtr(target.Index * ElementSize), new IntPtr(target.LengthInBytes))); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary> /// Loads the given OpenCL kernel. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="source">The OpenCL source code.</param> /// <param name="programPtr">The created program pointer.</param> /// <param name="kernelPtr">The created kernel pointer.</param> /// <returns>True, if the program and the kernel could be loaded successfully.</returns> internal static CLError LoadKernel( CLAccelerator accelerator, string source, out IntPtr programPtr, out IntPtr kernelPtr) { kernelPtr = IntPtr.Zero; var error = CLAPI.CreateProgram( accelerator.ContextPtr, source, out programPtr); if (error != CLError.CL_SUCCESS) { return(error); } // TODO: OpenCL compiler options string options = string.Empty; error |= CLAPI.BuildProgram( programPtr, accelerator.DeviceId, options); error |= CLAPI.CreateKernel( programPtr, CLCompiledKernel.EntryName, out kernelPtr); if (error != CLError.CL_SUCCESS) { CLException.ThrowIfFailed( CLAPI.ReleaseProgram(programPtr)); programPtr = IntPtr.Zero; } return(error); }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, LongIndex1 targetOffset) { var binding = Accelerator.BindScoped(); switch (source.AcceleratorType) { case AcceleratorType.CPU: CLException.ThrowIfFailed( CurrentAPI.WriteBuffer( stream, NativePtr, false, new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes), new IntPtr(source.LoadEffectiveAddress()))); break; case AcceleratorType.OpenCL: CLException.ThrowIfFailed( CurrentAPI.CopyBuffer( stream, source.Source.NativePtr, NativePtr, new IntPtr(source.Index * ElementSize), new IntPtr(targetOffset * ElementSize), new IntPtr(source.LengthInBytes))); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }