/// <summary> /// Init grid information. /// </summary> private void InitGridInfo() { // Max grid size int workItemDimensions = IntrinsicMath.Max(CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3D( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); MaxGroupSize = new Index3D( MaxNumThreadsPerGroup, MaxNumThreadsPerGroup, MaxNumThreadsPerGroup); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; }
/// <summary> /// Init grid information. /// </summary> private void InitGridInfo() { int workItemDimensions = IntrinsicMath.Max(CurrentAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); // OpenCL does not report maximium grid sizes, MaxGridSize value is consistent // with the CPU accelator and values returned by CUDA accelerators. // MaxGridSize is ultimately contrained by system and device memory // and how each kernel manages memory. MaxGridSize = new Index3D(int.MaxValue, ushort.MaxValue, ushort.MaxValue); // Resolve max threads per group MaxNumThreadsPerGroup = CurrentAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Max work item thread dimensions var workItemSizes = new IntPtr[workItemDimensions]; CurrentAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGroupSize = new Index3D( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; }
/// <summary> /// Constructs a new use distribution. /// </summary> /// <param name="context">The target context.</param> public UseDistribution(IRContext context) { if (context == null) { throw new ArgumentNullException(nameof(context)); } var groupedUses = new Dictionary <int, int>(); var usesPerType = new Dictionary <Type, (int, int)>(); foreach (var method in context.Methods) { foreach (Value value in method.Blocks.Values) { if (!groupedUses.TryGetValue(value.AllNumUses, out int count)) { count = 0; } groupedUses[value.AllNumUses] = count + 1; var type = value.GetType(); if (!usesPerType.TryGetValue(type, out var entry)) { entry = (0, 0); } usesPerType[type] = (IntrinsicMath.Max(value.AllNumUses, entry.Item1), entry.Item2 + 1); } } var groupedUsesList = new List <(int, int)>(groupedUses.Count); foreach (var entry in groupedUses) { groupedUsesList.Add((entry.Key, entry.Value)); } groupedUsesList.Sort((x, y) => y.Item1.CompareTo(x.Item1)); Uses = groupedUsesList.ToImmutableArray(); var groupedUsesPerTypeList = new List <(int, Type, int)>(usesPerType.Count); foreach (var entry in usesPerType) { groupedUsesPerTypeList.Add( (entry.Value.Item1, entry.Key, entry.Value.Item2)); } groupedUsesPerTypeList.Sort((x, y) => y.Item1.CompareTo(x.Item1)); Uses = groupedUsesList.ToImmutableArray(); UsesPerType = groupedUsesPerTypeList.ToImmutableArray(); }
/// <summary> /// Performs an internal shared-memory allocation. /// </summary> /// <param name="extent">The number of elements to allocate.</param> private void AllocateSharedMemoryInternal <T>(int extent) where T : unmanaged { int sizeInBytes = extent * Interop.SizeOf <T>(); if (advancedSharedMemoryBufferIndex < 0) { // We can allocate the required memory if (sharedMemoryOffset + sizeInBytes <= SharedMemory.Length) { currentSharedMemoryView = SharedMemory.GetSubView( sharedMemoryOffset, sizeInBytes); sharedMemoryOffset += sizeInBytes; return; } // We have to perform an advanced buffer allocation // -> ... } else { // Use the advanced buffer var buffer = advancedSharedMemoryBuffer[advancedSharedMemoryBufferIndex]; if (sharedMemoryOffset + sizeInBytes <= buffer.Length) { currentSharedMemoryView = buffer.View.GetSubView( sharedMemoryOffset, sizeInBytes); sharedMemoryOffset += sizeInBytes; return; } // We have to perform a new buffer allocation } // We need a new dynamically-chunk of shared memory var tempBuffer = Accelerator.Allocate <byte>( IntrinsicMath.Max(sizeInBytes, SharedMemoryChunkSize)); advancedSharedMemoryBuffer.Add(tempBuffer); currentSharedMemoryView = tempBuffer.View.GetSubView(0, sizeInBytes); sharedMemoryOffset = sizeInBytes; ++advancedSharedMemoryBufferIndex; }
/// <summary> /// Constructs a new OpenCL accelerator. /// </summary> /// <param name="context">The ILGPU context.</param> /// <param name="acceleratorId">The accelerator id.</param> public CLAccelerator(Context context, CLAcceleratorId acceleratorId) : base(context, AcceleratorType.OpenCL) { if (acceleratorId == null) { throw new ArgumentNullException(nameof(acceleratorId)); } PlatformId = acceleratorId.PlatformId; DeviceId = acceleratorId.DeviceId; PlatformName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_NAME); VendorName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_VENDOR); // Create new context CLException.ThrowIfFailed( CLAPI.CreateContext(DeviceId, out contextPtr)); // Resolve device info Name = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_NAME); MemorySize = CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Max grid size int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); // Resolve clock rate ClockRate = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY); // Resolve number of multiprocessors NumMultiprocessors = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; InitVendorFeatures(); InitSubGroupSupport(acceleratorId); Bind(); DefaultStream = CreateStreamInternal(); base.Backend = new CLBackend(Context, Backends.Backend.OSPlatform, Vendor); }
/// <summary> /// Returns the last inclusive field access. /// </summary> /// <returns>The last inclusive field access.</returns> public FieldAccess GetLastAccess() => Access.Add(IntrinsicMath.Max(Span - 1, 0));
public static ulong Max(ulong first, ulong second) => IntrinsicMath.Max(first, second);
public static uint Max(uint first, uint second) => IntrinsicMath.Max(first, second);
public static ushort Max(ushort first, ushort second) => IntrinsicMath.Max(first, second);
public static byte Max(byte first, byte second) => IntrinsicMath.Max(first, second);
public static int Max(int first, int second) => IntrinsicMath.Max(first, second);
public static float Max(float first, float second) => IntrinsicMath.Max(first, second);
public static double Max(double first, double second) => IntrinsicMath.Max(first, second);