Esempio n. 1
0
        private void InitVendorFeatures()
        {
            // Check major vendor features
            if (CLAPI.GetDeviceInfo(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV,
                    out int warpSize) == CLError.CL_SUCCESS)
            {
                // Nvidia platform
                WarpSize = warpSize;
                Vendor   = CLAcceleratorVendor.Nvidia;

                int major = CLAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV);
                int minor = CLAPI.GetDeviceInfo <int>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
                if (major < 7 || major == 7 && minor < 5)
                {
                    MaxNumThreadsPerMultiprocessor *= 2;
                }
            }
            else if (CLAPI.GetDeviceInfo(
                         DeviceId,
                         CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD,
                         out int wavefrontSize) == CLError.CL_SUCCESS)
            {
                // AMD platform
                WarpSize = wavefrontSize;
                Vendor   = CLAcceleratorVendor.AMD;
            }
            else
            {
                Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ?
                         CLAcceleratorVendor.Intel :
                         CLAcceleratorVendor.Other;

                // Compile dummy kernel to resolve additional information
                CLException.ThrowIfFailed(CLKernel.LoadKernel(
                                              this,
                                              DummyKernelSource,
                                              out IntPtr programPtr,
                                              out IntPtr kernelPtr));
                try
                {
                    // Resolve information
                    WarpSize = CLAPI.GetKernelWorkGroupInfo <IntPtr>(
                        kernelPtr,
                        DeviceId,
                        CLKernelWorkGroupInfoType.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32();
                }
                finally
                {
                    CLException.ThrowIfFailed(
                        CLAPI.ReleaseKernel(kernelPtr) |
                        CLAPI.ReleaseProgram(programPtr));
                }
            }
        }
Esempio n. 2
0
        /// <summary cref="Accelerator.EstimateGroupSizeInternal(
        /// Kernel, int, int, out int)"/>
        protected override int EstimateGroupSizeInternal(
            Kernel kernel,
            int dynamicSharedMemorySizeInBytes,
            int maxGroupSize,
            out int minGridSize)
        {
            if (dynamicSharedMemorySizeInBytes > 0)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(dynamicSharedMemorySizeInBytes));
            }

            if (maxGroupSize < 1)
            {
                maxGroupSize = MaxNumThreadsPerGroup;
            }

            var clKernel            = kernel as CLKernel;
            var workGroupSizeNative = CLAPI.GetKernelWorkGroupInfo <IntPtr>(
                clKernel.KernelPtr,
                DeviceId,
                CLKernelWorkGroupInfoType.CL_KERNEL_WORK_GROUP_SIZE);
            int workGroupSize = workGroupSizeNative.ToInt32();

            workGroupSize = IntrinsicMath.Min(workGroupSize, maxGroupSize);
            minGridSize   = IntrinsicMath.DivRoundUp(MaxNumThreads, workGroupSize);

            return(workGroupSize);
        }