Exemple #1
0
        /// <summary>
        /// Constructs a new OpenCL accelerator reference.
        /// </summary>
        /// <param name="platformId">The OpenCL platform id.</param>
        /// <param name="deviceId">The OpenCL device id.</param>
        public CLAcceleratorId(IntPtr platformId, IntPtr deviceId)
            : base(AcceleratorType.OpenCL)
        {
            if (platformId == IntPtr.Zero)
            {
                throw new ArgumentOutOfRangeException(nameof(platformId));
            }
            if (deviceId == IntPtr.Zero)
            {
                throw new ArgumentOutOfRangeException(nameof(deviceId));
            }

            PlatformId = platformId;
            DeviceId   = deviceId;

            DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>(
                deviceId,
                CLDeviceInfoType.CL_DEVICE_TYPE);

            // Resolve extensions
            var extensionString = CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_EXTENSIONS);

            extensionSet = new HashSet <string>(
                extensionString.ToLower().Split(' '));
            Extensions = extensionSet.ToImmutableArray();

            // Resolve extension method
            getKernelSubGroupInfo = CLAPI.GetExtension <clGetKernelSubGroupInfoKHR>(platformId);
        }
Exemple #2
0
        /// <summary>
        /// Loads the binary representation of the given OpenCL kernel.
        /// </summary>
        /// <param name="program">The program pointer.</param>
        /// <returns>The binary representation of the underlying kernel.</returns>
        public static unsafe byte[] LoadBinaryRepresentation(IntPtr program)
        {
            IntPtr kernelSize;

            CLException.ThrowIfFailed(
                CLAPI.GetProgramInfo(
                    program,
                    CLProgramInfo.CL_PROGRAM_BINARY_SIZES,
                    new IntPtr(IntPtr.Size),
                    &kernelSize,
                    out var _));

            var programBinary = new byte[kernelSize.ToInt32()];

            fixed(byte *binPtr = &programBinary[0])
            {
                CLException.ThrowIfFailed(
                    CLAPI.GetProgramInfo(
                        program,
                        CLProgramInfo.CL_PROGRAM_BINARIES,
                        new IntPtr(IntPtr.Size),
                        &binPtr,
                        out var _));
            }

            return(programBinary);
        }
Exemple #3
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/>
        protected internal unsafe override void CopyFromView(
            AcceleratorStream stream,
            ArrayView <T> source,
            Index targetOffset)
        {
            var clStream = (CLStream)stream;

            switch (source.AcceleratorType)
            {
            case AcceleratorType.CPU:
                CLException.ThrowIfFailed(
                    CLAPI.WriteBuffer(
                        clStream.CommandQueue,
                        NativePtr,
                        false,
                        new IntPtr(targetOffset * ElementSize),
                        new IntPtr(source.LengthInBytes),
                        new IntPtr(source.LoadEffectiveAddress())));
                break;

            case AcceleratorType.OpenCL:
                CLException.ThrowIfFailed(
                    CLAPI.CopyBuffer(
                        clStream.CommandQueue,
                        source.Source.NativePtr,
                        NativePtr,
                        new IntPtr(source.Index * ElementSize),
                        new IntPtr(targetOffset * ElementSize),
                        new IntPtr(source.LengthInBytes)));
                break;

            default:
                throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }
        }
Exemple #4
0
        private void InitVendorFeatures()
        {
            // Check major vendor features
            if (CLAPI.GetDeviceInfo(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV,
                    out int warpSize) == CLError.CL_SUCCESS)
            {
                // Nvidia platform
                WarpSize = warpSize;
                Vendor   = CLAcceleratorVendor.Nvidia;

                int major = CLAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV);
                int minor = CLAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
                if (major < 7 || major == 7 && minor < 5)
                {
                    MaxNumThreadsPerMultiprocessor *= 2;
                }
            }
            else if (CLAPI.GetDeviceInfo(
                         DeviceId,
                         CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD,
                         out int wavefrontSize) == CLError.CL_SUCCESS)
            {
                // AMD platform
                WarpSize = wavefrontSize;
                Vendor   = CLAcceleratorVendor.AMD;
            }
            else
            {
                Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ?
                         CLAcceleratorVendor.Intel :
                         CLAcceleratorVendor.Other;

                // Compile dummy kernel to resolve additional information
                CLException.ThrowIfFailed(CLKernel.LoadKernel(
                                              this,
                                              DummyKernelSource,
                                              out IntPtr programPtr,
                                              out IntPtr kernelPtr));
                try
                {
                    // Resolve information
                    WarpSize = CLAPI.GetKernelWorkGroupInfo <IntPtr>(
                        kernelPtr,
                        DeviceId,
                        CLKernelWorkGroupInfoType.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32();
                }
                finally
                {
                    CLException.ThrowIfFailed(
                        CLAPI.ReleaseKernel(kernelPtr) |
                        CLAPI.ReleaseProgram(programPtr));
                }
            }
        }
Exemple #5
0
        /// <summary cref="Accelerator.EstimateGroupSizeInternal(
        /// Kernel, int, int, out int)"/>
        protected override int EstimateGroupSizeInternal(
            Kernel kernel,
            int dynamicSharedMemorySizeInBytes,
            int maxGroupSize,
            out int minGridSize)
        {
            if (dynamicSharedMemorySizeInBytes > 0)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(dynamicSharedMemorySizeInBytes));
            }

            if (maxGroupSize < 1)
            {
                maxGroupSize = MaxNumThreadsPerGroup;
            }

            var clKernel            = kernel as CLKernel;
            var workGroupSizeNative = CLAPI.GetKernelWorkGroupInfo <IntPtr>(
                clKernel.KernelPtr,
                DeviceId,
                CLKernelWorkGroupInfoType.CL_KERNEL_WORK_GROUP_SIZE);
            int workGroupSize = workGroupSizeNative.ToInt32();

            workGroupSize = IntrinsicMath.Min(workGroupSize, maxGroupSize);
            minGridSize   = IntrinsicMath.DivRoundUp(MaxNumThreads, workGroupSize);

            return(workGroupSize);
        }
Exemple #6
0
        /// <summary cref="DisposeBase.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            base.Dispose(disposing);

            CLException.ThrowIfFailed(
                CLAPI.ReleaseContext(contextPtr));
            contextPtr = IntPtr.Zero;
        }
Exemple #7
0
 /// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/>
 public override void MemSetToZero(AcceleratorStream stream) =>
 CLException.ThrowIfFailed(
     CLAPI.FillBuffer <byte>(
         ((CLStream)stream).CommandQueue,
         NativePtr,
         0,
         IntPtr.Zero,
         new IntPtr(LengthInBytes)));
Exemple #8
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (DeviceId != IntPtr.Zero)
     {
         CLAPI.ReleaseDevice(DeviceId);
         DeviceId = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
Exemple #9
0
        /// <summary cref="DisposeBase.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            CLException.ThrowIfFailed(
                CLAPI.ReleaseKernel(kernelPtr) |
                CLAPI.ReleaseProgram(programPtr));

            programPtr = IntPtr.Zero;
            kernelPtr  = IntPtr.Zero;
        }
Exemple #10
0
 internal CLStream(CLAccelerator accelerator)
     : base(accelerator)
 {
     CLException.ThrowIfFailed(
         CLAPI.CreateCommandQueue(
             accelerator.DeviceId,
             accelerator.ContextPtr,
             out queuePtr));
 }
Exemple #11
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (queuePtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CLAPI.ReleaseCommandQueue(queuePtr));
     }
     queuePtr = IntPtr.Zero;
 }
Exemple #12
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (NativePtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CLAPI.ReleaseBuffer(NativePtr));
         NativePtr = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
Exemple #13
0
 /// <summary>
 /// Constructs a new OpenCL buffer.
 /// </summary>
 /// <param name="accelerator">The accelerator.</param>
 /// <param name="extent">The extent.</param>
 internal CLMemoryBuffer(CLAccelerator accelerator, TIndex extent)
     : base(accelerator, extent)
 {
     CLException.ThrowIfFailed(
         CLAPI.CreateBuffer(
             accelerator.ContextPtr,
             CLBufferFlags.CL_MEM_KERNEL_READ_AND_WRITE,
             new IntPtr(extent.Size * ElementSize),
             IntPtr.Zero,
             out IntPtr resultPtr));
     NativePtr = resultPtr;
 }
Exemple #14
0
        private void InitSubGroupSupport(CLAcceleratorId acceleratorId)
        {
            // Check sub group support
            if (!(SubGroupSupport = acceleratorId.HasAnyExtension(SubGroupExtensions)))
            {
                return;
            }

            // Verify support using a simple kernel
            if (CLKernel.LoadKernel(
                    this,
                    DummySubGroupKernelSource,
                    CVersion,
                    out IntPtr programPtr,
                    out IntPtr kernelPtr,
                    out var _) == CLError.CL_SUCCESS)
            {
                // Some drivers return an internal handler delegate
                // that crashes during invocation instead of telling that the
                // sub-group feature is not supported
                try
                {
                    var localGroupSizes = new IntPtr[]
                    {
                        new IntPtr(MaxNumThreadsPerGroup)
                    };
                    SubGroupSupport = acceleratorId.TryGetKernelSubGroupInfo(
                        kernelPtr,
                        DeviceId,
                        CLKernelSubGroupInfoType
                        .CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
                        localGroupSizes,
                        out IntPtr subGroupSize);
                    WarpSize = subGroupSize.ToInt32();
                }
                catch (AccessViolationException)
                {
                    // This exception can be raised due to driver issues
                    // on several platforms -> we will just disable sub-group
                    // support for these platforms
                    SubGroupSupport = false;
                }
                finally
                {
                    CLException.ThrowIfFailed(
                        CLAPI.ReleaseKernel(kernelPtr));
                    CLException.ThrowIfFailed(
                        CLAPI.ReleaseProgram(programPtr));
                }
            }
        }
Exemple #15
0
        /// <summary>
        /// Loads the given OpenCL kernel.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="source">The OpenCL source code.</param>
        /// <param name="version">The OpenCL C version.</param>
        /// <param name="programPtr">The created program pointer.</param>
        /// <param name="kernelPtr">The created kernel pointer.</param>
        /// <param name="errorLog">The error log (if any).</param>
        /// <returns>True, if the program and the kernel could be loaded successfully.</returns>
        internal static CLError LoadKernel(
            CLAccelerator accelerator,
            string source,
            CLCVersion version,
            out IntPtr programPtr,
            out IntPtr kernelPtr,
            out string errorLog)
        {
            errorLog  = null;
            kernelPtr = IntPtr.Zero;
            var programError = CLAPI.CreateProgram(
                accelerator.ContextPtr,
                source,
                out programPtr);

            if (programError != CLError.CL_SUCCESS)
            {
                return(programError);
            }

            // Specify the OpenCL C version.
            string options = "-cl-std=" + version.ToString();

            var buildError = CLAPI.BuildProgram(
                programPtr,
                accelerator.DeviceId,
                options);

            if (buildError != CLError.CL_SUCCESS)
            {
                CLException.ThrowIfFailed(
                    CLAPI.GetProgramBuildLog(
                        programPtr,
                        accelerator.DeviceId,
                        out errorLog));
                CLException.ThrowIfFailed(
                    CLAPI.ReleaseProgram(programPtr));
                programPtr = IntPtr.Zero;
                return(buildError);
            }

            return(CLAPI.CreateKernel(
                       programPtr,
                       CLCompiledKernel.EntryName,
                       out kernelPtr));
        }
Exemple #16
0
        /// <summary>
        /// Constructs a new OpenCL accelerator reference.
        /// </summary>
        /// <param name="platformId">The OpenCL platform id.</param>
        /// <param name="deviceId">The OpenCL device id.</param>
        public CLAcceleratorId(IntPtr platformId, IntPtr deviceId)
            : base(AcceleratorType.OpenCL)
        {
            if (platformId == IntPtr.Zero)
            {
                throw new ArgumentOutOfRangeException(nameof(platformId));
            }
            if (deviceId == IntPtr.Zero)
            {
                throw new ArgumentOutOfRangeException(nameof(deviceId));
            }

            PlatformId = platformId;
            DeviceId   = deviceId;

            DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>(
                deviceId,
                CLDeviceInfoType.CL_DEVICE_TYPE);

            // Resolve extensions
            var extensionString = CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_EXTENSIONS);

            extensionSet = new HashSet <string>(
                extensionString.ToLower().Split(' '));
            Extensions = extensionSet.ToImmutableArray();

            // Determine the supported OpenCL C version
            var clVersionString = CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_OPENCL_C_VERSION);

            if (!CLCVersion.TryParse(clVersionString, out CLCVersion version))
            {
                version = CLCVersion.CL10;
            }
            CVersion = version;

            // Resolve extension method
            getKernelSubGroupInfo = CLAPI.GetExtension <clGetKernelSubGroupInfoKHR>(
                platformId);
        }
Exemple #17
0
        /// <summary>
        /// Loads the given OpenCL kernel.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="source">The OpenCL source code.</param>
        /// <param name="programPtr">The created program pointer.</param>
        /// <param name="kernelPtr">The created kernel pointer.</param>
        /// <returns>True, if the program and the kernel could be loaded successfully.</returns>
        internal static CLError LoadKernel(
            CLAccelerator accelerator,
            string source,
            out IntPtr programPtr,
            out IntPtr kernelPtr)
        {
            kernelPtr = IntPtr.Zero;
            var error = CLAPI.CreateProgram(
                accelerator.ContextPtr,
                source,
                out programPtr);

            if (error != CLError.CL_SUCCESS)
            {
                return(error);
            }

            // TODO: OpenCL compiler options
            string options = string.Empty;

            error |= CLAPI.BuildProgram(
                programPtr,
                accelerator.DeviceId,
                options);

            error |= CLAPI.CreateKernel(
                programPtr,
                CLCompiledKernel.EntryName,
                out kernelPtr);

            if (error != CLError.CL_SUCCESS)
            {
                CLException.ThrowIfFailed(
                    CLAPI.ReleaseProgram(programPtr));
                programPtr = IntPtr.Zero;
            }
            return(error);
        }
Exemple #18
0
 /// <summary>
 /// Resolves device information as typed structure value of type
 /// <typeparamref name="T"/>.
 /// </summary>
 /// <typeparam name="T">The target type.</typeparam>
 /// <param name="type">The information type.</param>
 /// <returns>The resolved value.</returns>
 public T GetDeviceInfo <T>(CLDeviceInfoType type)
     where T : unmanaged => CLAPI.GetDeviceInfo <T>(DeviceId, type);
Exemple #19
0
 /// <summary cref="AcceleratorStream.Synchronize"/>
 public override void Synchronize()
 {
     CLException.ThrowIfFailed(
         CLAPI.FinishCommandQueue(queuePtr));
 }
Exemple #20
0
        static CLAccelerator()
        {
            var accelerators    = ImmutableArray.CreateBuilder <CLAcceleratorId>();
            var allAccelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>();
            var devices         = new IntPtr[MaxNumDevicesPerPlatform];

            try
            {
                // Resolve all platforms
                if (CLAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS ||
                    numPlatforms < 1)
                {
                    return;
                }

                var platforms = new IntPtr[numPlatforms];
                if (CLAPI.GetPlatforms(platforms, out numPlatforms) !=
                    CLError.CL_SUCCESS)
                {
                    return;
                }

                foreach (var platform in platforms)
                {
                    // Resolve all devices
                    int numDevices = devices.Length;
                    Array.Clear(devices, 0, numDevices);

                    if (CLAPI.GetDevices(
                            platform,
                            CLDeviceType.CL_DEVICE_TYPE_ALL,
                            devices,
                            out numDevices) != CLError.CL_SUCCESS)
                    {
                        continue;
                    }

                    for (int i = 0; i < numDevices; ++i)
                    {
                        // Resolve device and ignore invalid devices
                        var device = devices[i];
                        if (device == IntPtr.Zero)
                        {
                            continue;
                        }

                        // Check for available device
                        if (CLAPI.GetDeviceInfo <int>(
                                device,
                                CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0)
                        {
                            continue;
                        }

                        var acceleratorId = new CLAcceleratorId(platform, device);
                        allAccelerators.Add(acceleratorId);
                        if (acceleratorId.CVersion >= CLBackend.MinimumVersion)
                        {
                            accelerators.Add(acceleratorId);
                        }
                    }
                }
            }
            catch (Exception)
            {
                // Ignore API-specific exceptions at this point
            }
            finally
            {
                CLAccelerators    = accelerators.ToImmutable();
                AllCLAccelerators = allAccelerators.ToImmutable();
            }
        }
Exemple #21
0
 /// <summary>
 /// Resolves device information as typed structure value of type
 /// <typeparamref name="T"/>.
 /// </summary>
 /// <typeparam name="T">The target type.</typeparam>
 /// <param name="type">The information type.</param>
 /// <param name="value">The resolved value.</param>
 /// <returns>The error code.</returns>
 public CLError GetDeviceInfo <T>(CLDeviceInfoType type, out T value)
     where T : unmanaged => CLAPI.GetDeviceInfo(DeviceId, type, out value);
Exemple #22
0
        static CLAccelerator()
        {
            var accelerators = ImmutableArray.CreateBuilder <CLAcceleratorId>();

            try
            {
                // Resolve all platforms
                if (CLAPI.GetNumPlatforms(out int numPlatforms) != CLError.CL_SUCCESS ||
                    numPlatforms < 1)
                {
                    return;
                }

                var platforms = new IntPtr[numPlatforms];
                if (CLAPI.GetPlatforms(platforms, out numPlatforms) != CLError.CL_SUCCESS)
                {
                    return;
                }

                foreach (var platform in platforms)
                {
                    // Resolve all devices
                    if (CLAPI.GetNumDevices(
                            platform,
                            CLDeviceType.CL_DEVICE_TYPE_ALL,
                            out int numDevices) != CLError.CL_SUCCESS)
                    {
                        continue;
                    }

                    var devices = new IntPtr[numDevices];
                    if (CLAPI.GetDevices(
                            platform,
                            CLDeviceType.CL_DEVICE_TYPE_ALL,
                            devices,
                            out numDevices) != CLError.CL_SUCCESS)
                    {
                        continue;
                    }

                    foreach (var device in devices)
                    {
                        // Check for available device
                        if (CLAPI.GetDeviceInfo <int>(
                                device,
                                CLDeviceInfoType.CL_DEVICE_AVAILABLE) == 0)
                        {
                            continue;
                        }

                        accelerators.Add(new CLAcceleratorId(
                                             platform,
                                             device));
                    }
                }
            }
            catch (Exception)
            {
                // Ignore API-specific exceptions at this point
            }
            finally
            {
                CLAccelerators = accelerators.ToImmutable();
            }
        }
Exemple #23
0
        /// <summary>
        /// Constructs a new OpenCL accelerator.
        /// </summary>
        /// <param name="context">The ILGPU context.</param>
        /// <param name="acceleratorId">The accelerator id.</param>
        public CLAccelerator(Context context, CLAcceleratorId acceleratorId)
            : base(context, AcceleratorType.OpenCL)
        {
            if (acceleratorId == null)
            {
                throw new ArgumentNullException(nameof(acceleratorId));
            }

            PlatformId = acceleratorId.PlatformId;
            DeviceId   = acceleratorId.DeviceId;

            PlatformName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_NAME);

            VendorName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_VENDOR);

            // Create new context
            CLException.ThrowIfFailed(
                CLAPI.CreateContext(DeviceId, out contextPtr));

            // Resolve device info
            Name = CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_NAME);

            MemorySize = CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE);

            DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_TYPE);

            // Max grid size
            int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>(
                                                           DeviceId,
                                                           CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3);
            var workItemSizes = new IntPtr[workItemDimensions];

            CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES,
                workItemSizes);
            MaxGridSize = new Index3(
                workItemSizes[0].ToInt32(),
                workItemSizes[1].ToInt32(),
                workItemSizes[2].ToInt32());

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32();

            // Resolve max shared memory per block
            MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min(
                CLAPI.GetDeviceInfo <long>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE),
                int.MaxValue);

            // Resolve total constant memory
            MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE);

            // Resolve clock rate
            ClockRate = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY);

            // Resolve number of multiprocessors
            NumMultiprocessors = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS);

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup;

            InitVendorFeatures();
            InitSubGroupSupport(acceleratorId);

            Bind();
            DefaultStream = CreateStreamInternal();
            base.Backend  = new CLBackend(Context, Backends.Backend.OSPlatform, Vendor);
        }