コード例 #1
0
ファイル: CLAccelerator.cs プロジェクト: m4rs-mt/ILGPU
 /// <summary>
 /// Initializes major vendor features.
 /// </summary>
 private void InitVendorFeatures()
 {
     // Check major vendor features
     if (Device.Vendor == CLDeviceVendor.Nvidia ||
         Device.Vendor == CLDeviceVendor.AMD)
     {
         return;
     }
     // Compile dummy kernel to resolve additional information
     CLException.ThrowIfFailed(CLKernel.LoadKernel(
                                   this,
                                   DummyKernelName,
                                   DummyKernelSource,
                                   CVersion,
                                   out IntPtr programPtr,
                                   out IntPtr kernelPtr,
                                   out var _));
     try
     {
         // Resolve information
         WarpSize = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>(
             kernelPtr,
             DeviceId,
             CLKernelWorkGroupInfoType
             .CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32();
     }
     finally
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseKernel(kernelPtr));
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseProgram(programPtr));
     }
 }
コード例 #2
0
ファイル: CLAccelerator.cs プロジェクト: m4rs-mt/ILGPU
        /// <summary>
        /// Initializes support for sub groups.
        /// </summary>
        /// <param name="acceleratorId">The current accelerator id.</param>
        private void InitSubGroupSupport(CLDevice acceleratorId)
        {
            // Check sub group support
            Capabilities.SubGroups = acceleratorId.HasAnyExtension(SubGroupExtensions);
            if (!Capabilities.SubGroups)
            {
                return;
            }

            // Verify support using a simple kernel
            if (CLKernel.LoadKernel(
                    this,
                    DummyKernelName,
                    DummySubGroupKernelSource,
                    CVersion,
                    out IntPtr programPtr,
                    out IntPtr kernelPtr,
                    out var _) == CLError.CL_SUCCESS)
            {
                // Some drivers return an internal handler delegate
                // that crashes during invocation instead of telling that the
                // sub-group feature is not supported
                try
                {
                    var localGroupSizes = new IntPtr[]
                    {
                        new IntPtr(MaxNumThreadsPerGroup)
                    };
                    Capabilities.SubGroups = acceleratorId.TryGetKernelSubGroupInfo(
                        kernelPtr,
                        DeviceId,
                        CLKernelSubGroupInfoType
                        .CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
                        localGroupSizes,
                        out IntPtr subGroupSize);
                    WarpSize = subGroupSize.ToInt32();
                }
                catch (AccessViolationException)
                {
                    // This exception can be raised due to driver issues
                    // on several platforms -> we will just disable sub-group
                    // support for these platforms
                    Capabilities.SubGroups = false;
                }
                finally
                {
                    CLException.ThrowIfFailed(
                        CurrentAPI.ReleaseKernel(kernelPtr));
                    CLException.ThrowIfFailed(
                        CurrentAPI.ReleaseProgram(programPtr));
                }
            }
        }
コード例 #3
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (kernelPtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseKernel(kernelPtr));
         kernelPtr = IntPtr.Zero;
     }
     if (programPtr != IntPtr.Zero)
     {
         CLException.ThrowIfFailed(
             CurrentAPI.ReleaseProgram(programPtr));
         programPtr = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
コード例 #4
0
        /// <summary>
        /// Loads the given OpenCL kernel.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="name">The name of the entry-point function.</param>
        /// <param name="source">The OpenCL source code.</param>
        /// <param name="version">The OpenCL C version.</param>
        /// <param name="programPtr">The created program pointer.</param>
        /// <param name="kernelPtr">The created kernel pointer.</param>
        /// <param name="errorLog">The error log (if any).</param>
        /// <returns>
        /// True, if the program and the kernel could be loaded successfully.
        /// </returns>
        public static CLError LoadKernel(
            CLAccelerator accelerator,
            string name,
            string source,
            CLCVersion version,
            out IntPtr programPtr,
            out IntPtr kernelPtr,
            out string errorLog)
        {
            errorLog  = null;
            kernelPtr = IntPtr.Zero;
            var programError = CurrentAPI.CreateProgram(
                accelerator.NativePtr,
                source,
                out programPtr);

            if (programError != CLError.CL_SUCCESS)
            {
                return(programError);
            }

            // Specify the OpenCL C version.
            string options = "-cl-std=" + version.ToString();

            var buildError = CurrentAPI.BuildProgram(
                programPtr,
                accelerator.DeviceId,
                options);

            if (buildError != CLError.CL_SUCCESS)
            {
                CLException.ThrowIfFailed(
                    CurrentAPI.GetProgramBuildLog(
                        programPtr,
                        accelerator.DeviceId,
                        out errorLog));
                CLException.ThrowIfFailed(
                    CurrentAPI.ReleaseProgram(programPtr));
                programPtr = IntPtr.Zero;
                return(buildError);
            }

            return(CurrentAPI.CreateKernel(
                       programPtr,
                       name,
                       out kernelPtr));
        }
コード例 #5
0
        /// <summary>
        /// Disposes this OpenCL kernel.
        /// </summary>
        protected override void DisposeAcceleratorObject(bool disposing)
        {
            // Free the kernel
            if (kernelPtr != IntPtr.Zero)
            {
                CLException.VerifyDisposed(
                    disposing,
                    CurrentAPI.ReleaseKernel(kernelPtr));
                kernelPtr = IntPtr.Zero;
            }

            // Free the surrounding program
            if (programPtr != IntPtr.Zero)
            {
                CLException.VerifyDisposed(
                    disposing,
                    CurrentAPI.ReleaseProgram(programPtr));
                programPtr = IntPtr.Zero;
            }
        }
コード例 #6
0
        private void InitVendorFeatures()
        {
            // Check major vendor features
            if (CurrentAPI.GetDeviceInfo(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV,
                    out int warpSize) == CLError.CL_SUCCESS)
            {
                // Nvidia platform
                WarpSize = warpSize;
                Vendor   = CLAcceleratorVendor.Nvidia;

                int major = CurrentAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV);
                int minor = CurrentAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
                if (major < 7 || major == 7 && minor < 5)
                {
                    MaxNumThreadsPerMultiprocessor *= 2;
                }
            }
            else if (CurrentAPI.GetDeviceInfo(
                         DeviceId,
                         CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD,
                         out int wavefrontSize) == CLError.CL_SUCCESS)
            {
                // AMD platform
                WarpSize = wavefrontSize;
                Vendor   = CLAcceleratorVendor.AMD;
            }
            else
            {
                Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ?
                         CLAcceleratorVendor.Intel :
                         CLAcceleratorVendor.Other;

                // Compile dummy kernel to resolve additional information
                CLException.ThrowIfFailed(CLKernel.LoadKernel(
                                              this,
                                              DummyKernelSource,
                                              CVersion,
                                              out IntPtr programPtr,
                                              out IntPtr kernelPtr,
                                              out var _));
                try
                {
                    // Resolve information
                    WarpSize = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>(
                        kernelPtr,
                        DeviceId,
                        CLKernelWorkGroupInfoType
                        .CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32();
                }
                finally
                {
                    CLException.ThrowIfFailed(
                        CurrentAPI.ReleaseKernel(kernelPtr));
                    CLException.ThrowIfFailed(
                        CurrentAPI.ReleaseProgram(programPtr));
                }
            }
        }