/// <summary cref="Accelerator.EstimateGroupSizeInternal( /// Kernel, int, int, out int)"/> protected override int EstimateGroupSizeInternal( Kernel kernel, int dynamicSharedMemorySizeInBytes, int maxGroupSize, out int minGridSize) { if (dynamicSharedMemorySizeInBytes > 0) { throw new ArgumentOutOfRangeException( nameof(dynamicSharedMemorySizeInBytes)); } if (maxGroupSize < 1) { maxGroupSize = MaxNumThreadsPerGroup; } var clKernel = kernel as CLKernel; var workGroupSizeNative = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>( clKernel.KernelPtr, DeviceId, CLKernelWorkGroupInfoType.CL_KERNEL_WORK_GROUP_SIZE); int workGroupSize = workGroupSizeNative.ToInt32(); workGroupSize = IntrinsicMath.Min(workGroupSize, maxGroupSize); minGridSize = IntrinsicMath.DivRoundUp(MaxNumThreads, workGroupSize); return(workGroupSize); }
public void CopyFrom( AcceleratorStream stream, T[][][] source, Index3 sourceOffset, Index3 targetOffset, Index3 extent) { if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 || extent.X > source.Length) { throw new ArgumentOutOfRangeException(nameof(extent)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 || sourceOffset.X >= extent.X || sourceOffset.Y >= extent.Y || sourceOffset.Z >= extent.Z) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } var tempBuffer = new T[extent.Size]; for (int i = 0; i < extent.X; ++i) { var subData = source[i + sourceOffset.X]; if (subData == null) { continue; } for (int j = 0; j < extent.Y; ++j) { var subSubData = subData[j + sourceOffset.Y]; if (subSubData == null) { continue; } // Skip entries that are out of bounds for ( int k = 0, e = IntrinsicMath.Min(subSubData.Length, extent.Z); k < e; ++k) { var targetIdx = new Index3(i, j, k).ComputeLinearIndex(extent); tempBuffer[targetIdx] = subSubData[k + sourceOffset.Z]; } } } buffer.CopyFrom( stream, tempBuffer, 0, targetOffset, extent.Size); }
/// <summary cref="Accelerator.EstimateMaxActiveGroupsPerMultiprocessor(Kernel, int, int)"/> protected override int EstimateMaxActiveGroupsPerMultiprocessorInternal( Kernel kernel, int groupSize, int dynamicSharedMemorySizeInBytes) { if (dynamicSharedMemorySizeInBytes > 0) { throw new ArgumentOutOfRangeException(nameof(dynamicSharedMemorySizeInBytes)); } groupSize = IntrinsicMath.Min(groupSize, MaxNumThreadsPerGroup); return(MaxNumThreadsPerGroup / groupSize); }
/// <summary> /// Init memory information. /// </summary> private void InitMemoryInfo() { // Resolve memory size MemorySize = CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CurrentAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); }
/// <summary> /// Constructs a new OpenCL accelerator. /// </summary> /// <param name="context">The ILGPU context.</param> /// <param name="acceleratorId">The accelerator id.</param> public CLAccelerator(Context context, CLAcceleratorId acceleratorId) : base(context, AcceleratorType.OpenCL) { if (acceleratorId == null) { throw new ArgumentNullException(nameof(acceleratorId)); } PlatformId = acceleratorId.PlatformId; DeviceId = acceleratorId.DeviceId; PlatformName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_NAME); VendorName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_VENDOR); // Create new context CLException.ThrowIfFailed( CLAPI.CreateContext(DeviceId, out contextPtr)); // Resolve device info Name = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_NAME); MemorySize = CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Max grid size int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); // Resolve clock rate ClockRate = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY); // Resolve number of multiprocessors NumMultiprocessors = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; InitVendorFeatures(); InitSubGroupSupport(acceleratorId); Bind(); DefaultStream = CreateStreamInternal(); base.Backend = new CLBackend(Context, Backends.Backend.OSPlatform, Vendor); }
public void CopyFrom( AcceleratorStream stream, T[][] source, LongIndex2 sourceOffset, LongIndex2 targetOffset, LongIndex2 extent) { if (source == null) { throw new ArgumentNullException(nameof(source)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.X >= source.LongLength) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } if (targetOffset.X < 0 || targetOffset.Y < 0 || targetOffset.X >= Extent.X || targetOffset.Y >= Extent.Y) { throw new ArgumentOutOfRangeException(nameof(targetOffset)); } if (extent.X < 0 || extent.Y < 0 || sourceOffset.X + extent.X > source.LongLength || targetOffset.X + extent.X > Extent.X || targetOffset.Y + extent.Y > Extent.Y) { throw new ArgumentOutOfRangeException(nameof(extent)); } var tempBuffer = new T[extent.Size]; for (long i = 0; i < extent.X; ++i) { var subData = source[i + sourceOffset.X]; if (subData == null) { continue; } // Skip entries that are out of bounds for ( long j = 0, e = IntrinsicMath.Min(subData.LongLength, extent.Y); j < e; ++j) { var targetIdx = new LongIndex2(i, j). ComputeLinearIndex(extent); tempBuffer[targetIdx] = subData[j + sourceOffset.Y]; } } buffer.CopyFrom( stream, tempBuffer, 0, targetOffset, extent.Size); }
public static ushort Min(ushort first, ushort second) => IntrinsicMath.Min(first, second);
public static byte Min(byte first, byte second) => IntrinsicMath.Min(first, second);
public static long Min(long first, long second) => IntrinsicMath.Min(first, second);
public static int Min(int first, int second) => IntrinsicMath.Min(first, second);
public static float Min(float first, float second) => IntrinsicMath.Min(first, second);
public static double Min(double first, double second) => IntrinsicMath.Min(first, second);
public static uint Min(uint first, uint second) => IntrinsicMath.Min(first, second);