private void InitVendorFeatures() { // Check major vendor features if (CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WARP_SIZE_NV, out int warpSize) == CLError.CL_SUCCESS) { // Nvidia platform WarpSize = warpSize; Vendor = CLAcceleratorVendor.Nvidia; int major = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); int minor = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); if (major < 7 || major == 7 && minor < 5) { MaxNumThreadsPerMultiprocessor *= 2; } } else if (CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_WAVEFRONT_WIDTH_AMD, out int wavefrontSize) == CLError.CL_SUCCESS) { // AMD platform WarpSize = wavefrontSize; Vendor = CLAcceleratorVendor.AMD; } else { Vendor = VendorName.Contains(CLAcceleratorVendor.Intel.ToString()) ? CLAcceleratorVendor.Intel : CLAcceleratorVendor.Other; // Compile dummy kernel to resolve additional information CLException.ThrowIfFailed(CLKernel.LoadKernel( this, DummyKernelSource, out IntPtr programPtr, out IntPtr kernelPtr)); try { // Resolve information WarpSize = CLAPI.GetKernelWorkGroupInfo <IntPtr>( kernelPtr, DeviceId, CLKernelWorkGroupInfoType.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE).ToInt32(); } finally { CLException.ThrowIfFailed( CLAPI.ReleaseKernel(kernelPtr) | CLAPI.ReleaseProgram(programPtr)); } } }
/// <summary cref="Accelerator.EstimateGroupSizeInternal( /// Kernel, int, int, out int)"/> protected override int EstimateGroupSizeInternal( Kernel kernel, int dynamicSharedMemorySizeInBytes, int maxGroupSize, out int minGridSize) { if (dynamicSharedMemorySizeInBytes > 0) { throw new ArgumentOutOfRangeException( nameof(dynamicSharedMemorySizeInBytes)); } if (maxGroupSize < 1) { maxGroupSize = MaxNumThreadsPerGroup; } var clKernel = kernel as CLKernel; var workGroupSizeNative = CLAPI.GetKernelWorkGroupInfo <IntPtr>( clKernel.KernelPtr, DeviceId, CLKernelWorkGroupInfoType.CL_KERNEL_WORK_GROUP_SIZE); int workGroupSize = workGroupSizeNative.ToInt32(); workGroupSize = IntrinsicMath.Min(workGroupSize, maxGroupSize); minGridSize = IntrinsicMath.DivRoundUp(MaxNumThreads, workGroupSize); return(workGroupSize); }