Пример #1
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/>
        protected internal unsafe override void CopyFromView(
            AcceleratorStream stream,
            ArrayView <T> source,
            Index targetOffset)
        {
            var clStream = (CLStream)stream;

            switch (source.AcceleratorType)
            {
            case AcceleratorType.CPU:
                CLException.ThrowIfFailed(
                    CLAPI.WriteBuffer(
                        clStream.CommandQueue,
                        NativePtr,
                        false,
                        new IntPtr(targetOffset * ElementSize),
                        new IntPtr(source.LengthInBytes),
                        new IntPtr(source.LoadEffectiveAddress())));
                break;

            case AcceleratorType.OpenCL:
                CLException.ThrowIfFailed(
                    CLAPI.CopyBuffer(
                        clStream.CommandQueue,
                        source.Source.NativePtr,
                        NativePtr,
                        new IntPtr(source.Index * ElementSize),
                        new IntPtr(targetOffset * ElementSize),
                        new IntPtr(source.LengthInBytes)));
                break;

            default:
                throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }
        }
Пример #2
0
        /// <inheritdoc/>
        public unsafe override void Synchronize()
        {
            ReadOnlySpan <IntPtr> events = stackalloc[] { EventPtr };

            CLException.ThrowIfFailed(
                CurrentAPI.WaitForEvents(events));
        }
Пример #3
0
        private void InitVendorFeatures()
        {
            // Check major vendor features
            if (CLAPI.GetDeviceInfo(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV,
                    out int warpSize) == CLError.CL_SUCCESS)
            {
                // Nvidia platform
                WarpSize = warpSize;
                Vendor   = CLAcceleratorVendor.Nvidia;

                int major = CLAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV);
                int minor = CLAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
                if (major < 7 || major == 7 && minor < 5)
                {
                    MaxNumThreadsPerMultiprocessor *= 2;
                }
            }
            else if (CLAPI.GetDeviceInfo(
                         DeviceId,
                         CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD,
                         out int wavefrontSize) == CLError.CL_SUCCESS)
            {
                // AMD platform
                WarpSize = wavefrontSize;
                Vendor   = CLAcceleratorVendor.AMD;
            }
            else
            {
                Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ?
                         CLAcceleratorVendor.Intel :
                         CLAcceleratorVendor.Other;

                // Compile dummy kernel to resolve additional information
                CLException.ThrowIfFailed(CLKernel.LoadKernel(
                                              this,
                                              DummyKernelSource,
                                              out IntPtr programPtr,
                                              out IntPtr kernelPtr));
                try
                {
                    // Resolve information
                    WarpSize = CLAPI.GetKernelWorkGroupInfo <IntPtr>(
                        kernelPtr,
                        DeviceId,
                        CLKernelWorkGroupInfoType.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32();
                }
                finally
                {
                    CLException.ThrowIfFailed(
                        CLAPI.ReleaseKernel(kernelPtr) |
                        CLAPI.ReleaseProgram(programPtr));
                }
            }
        }
Пример #4
0
 /// <summary>
 /// Initializes major vendor features.
 /// </summary>
 private void InitVendorFeatures()
 {
     // Check major vendor features
     if (Device.Vendor == CLDeviceVendor.Nvidia ||
         Device.Vendor == CLDeviceVendor.AMD)
     {
         return;
     }
     // Compile dummy kernel to resolve additional information
     CLException.ThrowIfFailed(CLKernel.LoadKernel(
                                   this,
                                   DummyKernelName,
                                   DummyKernelSource,
                                   CVersion,
                                   out IntPtr programPtr,
                                   out IntPtr kernelPtr,
                                   out var _));
     try
     {
         // Resolve information
         WarpSize = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>(
             kernelPtr,
             DeviceId,
             CLKernelWorkGroupInfoType
             .CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32();
     }
     finally
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseKernel(kernelPtr));
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseProgram(programPtr));
     }
 }
Пример #5
0
        /// <summary>
        /// Loads a compiled kernel into the given OpenCL context as kernel program.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="kernel">The source kernel.</param>
        /// <param name="launcher">The launcher method for the given kernel.</param>
        public CLKernel(
            CLAccelerator accelerator,
            CLCompiledKernel kernel,
            MethodInfo launcher)
            : base(accelerator, kernel, launcher)
        {
            var errorCode = LoadKernel(
                accelerator,
                kernel.Name,
                kernel.Source,
                kernel.CVersion,
                out programPtr,
                out kernelPtr,
                out var errorLog);

            if (errorCode != CLError.CL_SUCCESS)
            {
                Trace.WriteLine("Kernel loading failed:");
                if (string.IsNullOrWhiteSpace(errorLog))
                {
                    Trace.WriteLine(">> No error information available");
                }
                else
                {
                    Trace.WriteLine(errorLog);
                }
            }
            CLException.ThrowIfFailed(errorCode);
        }
Пример #6
0
 /// <inheritdoc/>
 protected override void DisposeAcceleratorObject(bool disposing)
 {
     CLException.VerifyDisposed(
         disposing,
         CurrentAPI.clReleaseEvent(EventPtr));
     EventPtr = IntPtr.Zero;
 }
Пример #7
0
 /// <summary>
 /// Disposes this OpenCL buffer.
 /// </summary>
 protected override void DisposeAcceleratorObject(bool disposing)
 {
     CLException.VerifyDisposed(
         disposing,
         CurrentAPI.ReleaseBuffer(NativePtr));
     NativePtr = IntPtr.Zero;
 }
Пример #8
0
        /// <summary cref="DisposeBase.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            base.Dispose(disposing);

            CLException.ThrowIfFailed(
                CLAPI.ReleaseContext(contextPtr));
            contextPtr = IntPtr.Zero;
        }
Пример #9
0
 /// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/>
 public override void MemSetToZero(AcceleratorStream stream) =>
 CLException.ThrowIfFailed(
     CLAPI.FillBuffer <byte>(
         ((CLStream)stream).CommandQueue,
         NativePtr,
         0,
         IntPtr.Zero,
         new IntPtr(LengthInBytes)));
Пример #10
0
        /// <inheritdoc/>
        public unsafe override void Synchronize()
        {
            using var binding = Accelerator.BindScoped();

            ReadOnlySpan <IntPtr> events = stackalloc[] { EventPtr };

            CLException.ThrowIfFailed(
                CurrentAPI.WaitForEvents(events));
        }
Пример #11
0
 internal CLStream(CLAccelerator accelerator)
     : base(accelerator)
 {
     CLException.ThrowIfFailed(
         CLAPI.CreateCommandQueue(
             accelerator.DeviceId,
             accelerator.ContextPtr,
             out queuePtr));
 }
Пример #12
0
        /// <summary cref="DisposeBase.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            CLException.ThrowIfFailed(
                CLAPI.ReleaseKernel(kernelPtr) |
                CLAPI.ReleaseProgram(programPtr));

            programPtr = IntPtr.Zero;
            kernelPtr  = IntPtr.Zero;
        }
Пример #13
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (queuePtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CLAPI.ReleaseCommandQueue(queuePtr));
     }
     queuePtr = IntPtr.Zero;
 }
Пример #14
0
 internal CLStream(CLAccelerator accelerator)
     : base(accelerator)
 {
     CLException.ThrowIfFailed(
         CurrentAPI.CreateCommandQueue(
             accelerator.DeviceId,
             accelerator.ContextPtr,
             out queuePtr));
     responsibleForHandle = true;
 }
Пример #15
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (responsibleForHandle && queuePtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseCommandQueue(queuePtr));
         queuePtr = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
Пример #16
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (NativePtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseBuffer(NativePtr));
         NativePtr = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
Пример #17
0
        /// <summary>
        /// Initializes support for sub groups.
        /// </summary>
        /// <param name="acceleratorId">The current accelerator id.</param>
        private void InitSubGroupSupport(CLDevice acceleratorId)
        {
            // Check sub group support
            Capabilities.SubGroups = acceleratorId.HasAnyExtension(SubGroupExtensions);
            if (!Capabilities.SubGroups)
            {
                return;
            }

            // Verify support using a simple kernel
            if (CLKernel.LoadKernel(
                    this,
                    DummyKernelName,
                    DummySubGroupKernelSource,
                    CVersion,
                    out IntPtr programPtr,
                    out IntPtr kernelPtr,
                    out var _) == CLError.CL_SUCCESS)
            {
                // Some drivers return an internal handler delegate
                // that crashes during invocation instead of telling that the
                // sub-group feature is not supported
                try
                {
                    var localGroupSizes = new IntPtr[]
                    {
                        new IntPtr(MaxNumThreadsPerGroup)
                    };
                    Capabilities.SubGroups = acceleratorId.TryGetKernelSubGroupInfo(
                        kernelPtr,
                        DeviceId,
                        CLKernelSubGroupInfoType
                        .CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
                        localGroupSizes,
                        out IntPtr subGroupSize);
                    WarpSize = subGroupSize.ToInt32();
                }
                catch (AccessViolationException)
                {
                    // This exception can be raised due to driver issues
                    // on several platforms -> we will just disable sub-group
                    // support for these platforms
                    Capabilities.SubGroups = false;
                }
                finally
                {
                    CLException.ThrowIfFailed(
                        CurrentAPI.ReleaseKernel(kernelPtr));
                    CLException.ThrowIfFailed(
                        CurrentAPI.ReleaseProgram(programPtr));
                }
            }
        }
Пример #18
0
 internal CLKernel(
     CLAccelerator accelerator,
     CLCompiledKernel kernel,
     MethodInfo launcher)
     : base(accelerator, kernel, launcher)
 {
     CLException.ThrowIfFailed(LoadKernel(
                                   accelerator,
                                   kernel.Source,
                                   out programPtr,
                                   out kernelPtr));
 }
Пример #19
0
        /// <summary>
        /// Disposes this OpenCL stream.
        /// </summary>
        protected override void DisposeAcceleratorObject(bool disposing)
        {
            if (!responsibleForHandle || queuePtr == IntPtr.Zero)
            {
                return;
            }

            CLException.VerifyDisposed(
                disposing,
                CurrentAPI.ReleaseCommandQueue(queuePtr));
            queuePtr = IntPtr.Zero;
        }
Пример #20
0
        /// <summary cref="Accelerator.Synchronize"/>
        protected unsafe override void SynchronizeInternal()
        {
            // All the events to wait on. Each event represents the completion
            // of all operations queued prior to said event.
            var streamInstances = InlineList <CLStream> .Create(4);

            var streamEvents = InlineList <IntPtr> .Create(4);

            try
            {
                ForEachChildObject <CLStream>(stream =>
                {
                    // Ignore disposed command queues at this point
                    if (stream.CommandQueue == IntPtr.Zero)
                    {
                        return;
                    }

                    // Low cost IntPtr* (cl_event*) allocation
                    IntPtr *resultEvent = stackalloc IntPtr[1];
                    CLException.ThrowIfFailed(
                        CurrentAPI.EnqueueBarrierWithWaitList(
                            stream.CommandQueue,
                            Array.Empty <IntPtr>(),
                            resultEvent));

                    // Dereference the pointer so we can store it
                    streamEvents.Add(*resultEvent);

                    // Keep the stream instance alive to avoid automatic disposal
                    streamInstances.Add(stream);
                });

                // Wait for all the events to fire, which would mean all operations
                // queued on an accelerator prior to synchronization have finished
                if (streamEvents.Count > 0)
                {
                    CLException.ThrowIfFailed(
                        CurrentAPI.WaitForEvents(streamEvents));
                }
            }
            finally
            {
                // Clean up the events we made
                foreach (var streamEvent in streamEvents)
                {
                    CLException.ThrowIfFailed(
                        CurrentAPI.ReleaseEvent(streamEvent));
                }
            }
        }
Пример #21
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (kernelPtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseKernel(kernelPtr));
         kernelPtr = IntPtr.Zero;
     }
     if (programPtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseProgram(programPtr));
         programPtr = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
Пример #22
0
        /// <summary>
        /// Loads the given OpenCL kernel.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="name">The name of the entry-point function.</param>
        /// <param name="source">The OpenCL source code.</param>
        /// <param name="version">The OpenCL C version.</param>
        /// <param name="programPtr">The created program pointer.</param>
        /// <param name="kernelPtr">The created kernel pointer.</param>
        /// <param name="errorLog">The error log (if any).</param>
        /// <returns>
        /// True, if the program and the kernel could be loaded successfully.
        /// </returns>
        public static CLError LoadKernel(
            CLAccelerator accelerator,
            string name,
            string source,
            CLCVersion version,
            out IntPtr programPtr,
            out IntPtr kernelPtr,
            out string errorLog)
        {
            errorLog  = null;
            kernelPtr = IntPtr.Zero;
            var programError = CurrentAPI.CreateProgram(
                accelerator.NativePtr,
                source,
                out programPtr);

            if (programError != CLError.CL_SUCCESS)
            {
                return(programError);
            }

            // Specify the OpenCL C version.
            string options = "-cl-std=" + version.ToString();

            var buildError = CurrentAPI.BuildProgram(
                programPtr,
                accelerator.DeviceId,
                options);

            if (buildError != CLError.CL_SUCCESS)
            {
                CLException.ThrowIfFailed(
                    CurrentAPI.GetProgramBuildLog(
                        programPtr,
                        accelerator.DeviceId,
                        out errorLog));
                CLException.ThrowIfFailed(
                    CurrentAPI.ReleaseProgram(programPtr));
                programPtr = IntPtr.Zero;
                return(buildError);
            }

            return(CurrentAPI.CreateKernel(
                       programPtr,
                       name,
                       out kernelPtr));
        }
Пример #23
0
        internal CLStream(CLAccelerator accelerator)
            : base(accelerator)
        {
            CLCommandQueueProperties properties =
                Accelerator.Context.Properties.EnableProfiling
                ? CLCommandQueueProperties.CL_QUEUE_PROFILING_ENABLE
                : default;

            CLException.ThrowIfFailed(
                CurrentAPI.CreateCommandQueue(
                    accelerator.PlatformVersion,
                    accelerator.DeviceId,
                    accelerator.NativePtr,
                    properties,
                    out queuePtr));
            responsibleForHandle = true;
        }
Пример #24
0
        /// <inheritdoc/>
        protected internal override unsafe void MemSetInternal(
            AcceleratorStream stream,
            byte value,
            long offsetInBytes,
            long lengthInBytes)
        {
            var binding = Accelerator.BindScoped();

            CLException.ThrowIfFailed(
                CurrentAPI.FillBuffer(
                    ((CLStream)stream).CommandQueue,
                    NativePtr,
                    value,
                    new IntPtr(offsetInBytes),
                    new IntPtr(lengthInBytes)));

            binding.Recover();
        }
Пример #25
0
 public readonly CLError PreLaunchKernel(
     CLStream stream,
     CLKernel kernel,
     RuntimeKernelConfig config)
 {
     // Allocate local buffer of desired size.
     CLException.ThrowIfFailed(
         CurrentAPI.SetKernelArgumentUnsafeWithKernel(
             kernel,
             0,
             config.SharedMemoryConfig.DynamicArraySize,
             null));
     // The length of the local buffer (in bytes).
     return(CurrentAPI.SetKernelArgument(
                kernel.KernelPtr,
                1,
                config.SharedMemoryConfig.DynamicArraySize));
 }
Пример #26
0
        /// <inheritdoc/>
        protected unsafe override ProfilingMarker AddProfilingMarkerInternal()
        {
            IntPtr *profilingEvent = stackalloc IntPtr[1];

            CLException.ThrowIfFailed(
                CurrentAPI.EnqueueBarrierWithWaitList(
                    queuePtr,
                    Array.Empty <IntPtr>(),
                    profilingEvent));

            // WORKAROUND: The OpenCL event needs to be awaited now, otherwise
            // it does not contain the correct timing - it appears to have the timing
            // of whenever it gets awaited.
            var marker = new CLProfilingMarker(*profilingEvent);

            marker.Synchronize();
            return(marker);
        }
Пример #27
0
        /// <summary>
        /// Disposes this OpenCL kernel.
        /// </summary>
        protected override void DisposeAcceleratorObject(bool disposing)
        {
            // Free the kernel
            if (kernelPtr != IntPtr.Zero)
            {
                CLException.VerifyDisposed(
                    disposing,
                    CurrentAPI.ReleaseKernel(kernelPtr));
                kernelPtr = IntPtr.Zero;
            }

            // Free the surrounding program
            if (programPtr != IntPtr.Zero)
            {
                CLException.VerifyDisposed(
                    disposing,
                    CurrentAPI.ReleaseProgram(programPtr));
                programPtr = IntPtr.Zero;
            }
        }
Пример #28
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyToView(
        /// AcceleratorStream, ArrayView{T}, LongIndex1)"/>
        protected internal unsafe override void CopyToView(
            AcceleratorStream stream,
            ArrayView <T> target,
            LongIndex1 sourceOffset)
        {
            var binding  = Accelerator.BindScoped();
            var clStream = (CLStream)stream;

            switch (target.AcceleratorType)
            {
            case AcceleratorType.CPU:
                CLException.ThrowIfFailed(
                    CurrentAPI.ReadBuffer(
                        clStream.CommandQueue,
                        NativePtr,
                        false,
                        new IntPtr(sourceOffset * ElementSize),
                        new IntPtr(target.LengthInBytes),
                        new IntPtr(target.LoadEffectiveAddress())));
                break;

            case AcceleratorType.OpenCL:
                CLException.ThrowIfFailed(
                    CurrentAPI.CopyBuffer(
                        clStream.CommandQueue,
                        NativePtr,
                        target.Source.NativePtr,
                        new IntPtr(sourceOffset * ElementSize),
                        new IntPtr(target.Index * ElementSize),
                        new IntPtr(target.LengthInBytes)));
                break;

            default:
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }

            binding.Recover();
        }
Пример #29
0
        /// <summary>
        /// Loads the given OpenCL kernel.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="source">The OpenCL source code.</param>
        /// <param name="programPtr">The created program pointer.</param>
        /// <param name="kernelPtr">The created kernel pointer.</param>
        /// <returns>True, if the program and the kernel could be loaded successfully.</returns>
        internal static CLError LoadKernel(
            CLAccelerator accelerator,
            string source,
            out IntPtr programPtr,
            out IntPtr kernelPtr)
        {
            kernelPtr = IntPtr.Zero;
            var error = CLAPI.CreateProgram(
                accelerator.ContextPtr,
                source,
                out programPtr);

            if (error != CLError.CL_SUCCESS)
            {
                return(error);
            }

            // TODO: OpenCL compiler options
            string options = string.Empty;

            error |= CLAPI.BuildProgram(
                programPtr,
                accelerator.DeviceId,
                options);

            error |= CLAPI.CreateKernel(
                programPtr,
                CLCompiledKernel.EntryName,
                out kernelPtr);

            if (error != CLError.CL_SUCCESS)
            {
                CLException.ThrowIfFailed(
                    CLAPI.ReleaseProgram(programPtr));
                programPtr = IntPtr.Zero;
            }
            return(error);
        }
Пример #30
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(
        /// AcceleratorStream, ArrayView{T}, LongIndex1)"/>
        protected internal unsafe override void CopyFromView(
            AcceleratorStream stream,
            ArrayView <T> source,
            LongIndex1 targetOffset)
        {
            var binding = Accelerator.BindScoped();

            switch (source.AcceleratorType)
            {
            case AcceleratorType.CPU:
                CLException.ThrowIfFailed(
                    CurrentAPI.WriteBuffer(
                        stream,
                        NativePtr,
                        false,
                        new IntPtr(targetOffset * ElementSize),
                        new IntPtr(source.LengthInBytes),
                        new IntPtr(source.LoadEffectiveAddress())));
                break;

            case AcceleratorType.OpenCL:
                CLException.ThrowIfFailed(
                    CurrentAPI.CopyBuffer(
                        stream,
                        source.Source.NativePtr,
                        NativePtr,
                        new IntPtr(source.Index * ElementSize),
                        new IntPtr(targetOffset * ElementSize),
                        new IntPtr(source.LengthInBytes)));
                break;

            default:
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }

            binding.Recover();
        }