public static sbyte Max(sbyte first, sbyte second) => IntrinsicMath.Max(first, second);
public static short Abs(short value) => IntrinsicMath.Abs(value);
public static long Abs(long value) => IntrinsicMath.Abs(value);
/// <summary> /// Constructs a new OpenCL accelerator. /// </summary> /// <param name="context">The ILGPU context.</param> /// <param name="acceleratorId">The accelerator id.</param> public CLAccelerator(Context context, CLAcceleratorId acceleratorId) : base(context, AcceleratorType.OpenCL) { if (acceleratorId == null) { throw new ArgumentNullException(nameof(acceleratorId)); } PlatformId = acceleratorId.PlatformId; DeviceId = acceleratorId.DeviceId; CVersion = acceleratorId.CVersion; PlatformName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_NAME); VendorName = CLAPI.GetPlatformInfo( PlatformId, CLPlatformInfoType.CL_PLATFORM_VENDOR); // Create new context CLException.ThrowIfFailed( CLAPI.CreateContext(DeviceId, out contextPtr)); // Resolve device info Name = CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_NAME); MemorySize = CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE); DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_TYPE); // Max grid size int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3); var workItemSizes = new IntPtr[workItemDimensions]; CLAPI.GetDeviceInfo( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemSizes); MaxGridSize = new Index3( workItemSizes[0].ToInt32(), workItemSizes[1].ToInt32(), workItemSizes[2].ToInt32()); // Resolve max threads per group MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32(); // Resolve max shared memory per block MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min( CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE), int.MaxValue); // Resolve total constant memory MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE); // Resolve clock rate ClockRate = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY); // Resolve number of multiprocessors NumMultiprocessors = CLAPI.GetDeviceInfo <int>( DeviceId, CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS); // Result max number of threads per multiprocessor MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup; InitVendorFeatures(); InitSubGroupSupport(acceleratorId); Bind(); DefaultStream = CreateStreamInternal(); Init(new CLBackend(Context, Vendor)); }
public static float Abs(float value) => IntrinsicMath.Abs(value);
public static ushort Min(ushort first, ushort second) => IntrinsicMath.Min(first, second);
/// <summary> /// Returns true if the given integer is a power of two. /// </summary> /// <param name="value">The integer value.</param> /// <returns>True, if the given integer is a power of two.</returns> public static bool IsPowerOf2(long value) => value != long.MinValue && IsPowerOf2((ulong)IntrinsicMath.Abs(value));
public static double Min(double first, double second) => IntrinsicMath.Min(first, second);
public static int Clamp(int value, int min, int max) => IntrinsicMath.Clamp(value, min, max);
public static double Clamp(double value, double min, double max) => IntrinsicMath.Clamp(value, min, max);
public static float Clamp(float value, float min, float max) => IntrinsicMath.Clamp(value, min, max);
public static ulong Max(ulong first, ulong second) => IntrinsicMath.Max(first, second);
public static uint Max(uint first, uint second) => IntrinsicMath.Max(first, second);
public static short Max(short first, short second) => IntrinsicMath.Max(first, second);
public static long Min(long first, long second) => IntrinsicMath.Min(first, second);
public static long Clamp(long value, long min, long max) => IntrinsicMath.Clamp(value, min, max);
public static byte Min(byte first, byte second) => IntrinsicMath.Min(first, second);
public static byte Clamp(byte value, byte min, byte max) => IntrinsicMath.Clamp(value, min, max);
/// <summary> /// Entry point for a single processing thread. /// </summary> /// <param name="arg">The absolute thread index.</param> private void ExecuteThread(object arg) { // Get the current thread information int absoluteThreadIndex = (int)arg; int threadIdx = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor; bool isMainThread = threadIdx == 0; // Setup a new thread context for this thread and initialize the lane index int laneIdx = threadIdx % WarpSize; int warpIdx = threadIdx / WarpSize; var threadContext = new CPURuntimeThreadContext(laneIdx, warpIdx) { LinearGroupIndex = threadIdx }; threadContext.MakeCurrent(); // Setup the current warp context as it always stays the same bool isMainWarpThread = threadIdx == 0; var warpContext = warpContexts[warpIdx]; warpContext.MakeCurrent(); // Setup the current group context as it always stays the same groupContext.MakeCurrent(); CPUAcceleratorTask task = null; for (; ;) { // Get a new task to execute (if any) if (!Accelerator.WaitForTask(ref task)) { break; } // Setup the current group index threadContext.GroupIndex = Index3D.ReconstructIndex( threadIdx, task.GroupDim); // Wait for all threads of all multiprocessors to arrive here Thread.MemoryBarrier(); processorBarrier.SignalAndWait(); try { // If we are an active group thread int groupSize = task.GroupDim.Size; if (threadIdx < groupSize) { try { var launcher = task.KernelExecutionDelegate; // Split the grid into different chunks that will be processed // by the available multiprocessors int linearGridDim = task.GridDim.Size; int gridChunkSize = IntrinsicMath.DivRoundUp( linearGridDim, Accelerator.NumMultiprocessors); int gridOffset = gridChunkSize * ProcessorIndex; int linearUserDim = task.TotalUserDim.Size; for ( int i = gridOffset, e = gridOffset + gridChunkSize; i < e; ++i) { BeginThreadProcessing(); try { // Setup the current grid index threadContext.GridIndex = Index3D.ReconstructIndex( i, task.GridDim); // Invoke the actual kernel launcher int globalIndex = i * groupSize + threadIdx; if (globalIndex < linearUserDim) { launcher(task, globalIndex); } } finally { EndThreadProcessing(); } } } finally { // This thread has already finished processing FinishThreadProcessing(); } } } finally { // Wait for all threads of all multiprocessors to arrive here processorBarrier.SignalAndWait(); // If we reach this point and we are the main thread, notify the // parent accelerator instance if (isMainThread) { Accelerator.FinishTaskProcessing(); } } } }
public static ushort Clamp(ushort value, ushort min, ushort max) => IntrinsicMath.Clamp(value, min, max);
public void CopyFrom( AcceleratorStream stream, T[][][] source, LongIndex3 sourceOffset, LongIndex3 targetOffset, LongIndex3 extent) { if (source == null) { throw new ArgumentNullException(nameof(source)); } if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 || sourceOffset.X >= source.LongLength) { throw new ArgumentOutOfRangeException(nameof(sourceOffset)); } if (targetOffset.X < 0 || targetOffset.Y < 0 || targetOffset.Z < 0 || targetOffset.X >= Extent.X || targetOffset.Y >= Extent.Y || targetOffset.Z >= Extent.Z) { throw new ArgumentOutOfRangeException(nameof(targetOffset)); } if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 || sourceOffset.X + extent.X > source.LongLength || targetOffset.X + extent.X > Extent.X || targetOffset.Y + extent.Y > Extent.Y || targetOffset.Z + extent.Z > Extent.Z) { throw new ArgumentOutOfRangeException(nameof(extent)); } var tempBuffer = new T[extent.Size]; for (long i = 0; i < extent.X; ++i) { var subData = source[i + sourceOffset.X]; if (subData == null) { continue; } for (long j = 0; j < extent.Y; ++j) { var subSubData = subData[j + sourceOffset.Y]; if (subSubData == null) { continue; } // Skip entries that are out of bounds for ( long k = 0, e = IntrinsicMath.Min( subSubData.LongLength, extent.Z); k < e; ++k) { var targetIdx = new LongIndex3(i, j, k). ComputeLinearIndex(extent); tempBuffer[targetIdx] = subSubData[k + sourceOffset.Z]; } } } buffer.CopyFrom( stream, tempBuffer, 0, targetOffset, extent.Size); }
public static uint Clamp(uint value, uint min, uint max) => IntrinsicMath.Clamp(value, min, max);
public static double Abs(double value) => IntrinsicMath.Abs(value);
public static ulong Clamp(ulong value, ulong min, ulong max) => IntrinsicMath.Clamp(value, min, max);
public static sbyte Abs(sbyte value) => IntrinsicMath.Abs(value);
public static float Min(float first, float second) => IntrinsicMath.Min(first, second);
public static int Abs(int value) => IntrinsicMath.Abs(value);
public static int Min(int first, int second) => IntrinsicMath.Min(first, second);
public static int TrailingZeroCount(uint value) => IntrinsicMath.TrailingZeroCount(value);
/// <summary> /// Returns true if the given integer is a power of two. /// </summary> /// <param name="value">The integer value.</param> /// <returns>True, if the given integer is a power of two.</returns> public static bool IsPowerOf2(long value) => value == long.MinValue ? false : IsPowerOf2((ulong)IntrinsicMath.Abs(value));