/// <summary> /// Entry point for a single processing thread. /// </summary> /// <param name="arg">The absolute thread index.</param> private void ExecuteThread(object arg) { // Get the current thread information int absoluteThreadIndex = (int)arg; int threadIdx = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor; var processorIdx = absoluteThreadIndex / MaxNumThreadsPerMultiprocessor; var processorBarrier = processorBarriers[processorIdx]; bool isMainThread = threadIdx == 0; // Setup a new thread context for this thread and initialize the lane index int laneIdx = threadIdx % WarpSize; var threadContext = new CPURuntimeThreadContext(laneIdx) { LinearGroupIndex = threadIdx }; threadContext.MakeCurrent(); // Setup the current warp context as it always stays the same int warpIdx = threadIdx / WarpSize; bool isMainWarpThread = threadIdx == 0; var warpContext = warpContexts[processorIdx, warpIdx]; warpContext.MakeCurrent(); // Setup the current group context as it always stays the same var groupContext = groupContexts[processorIdx]; groupContext.MakeCurrent(); CPUAcceleratorTask task = null; for (; ;) { // Get a new task to execute lock (taskSynchronizationObject) { while ((currentTask == null | currentTask == task) & running) { Monitor.Wait(taskSynchronizationObject); } if (!running) { break; } task = currentTask; } Debug.Assert(task != null, "Invalid task"); // Setup the current group index threadContext.GroupIndex = Index3D.ReconstructIndex( threadIdx, task.GroupDim); // Wait for all threads of all multiprocessors to arrive here Thread.MemoryBarrier(); processorBarrier.SignalAndWait(); try { // If we are an active group thread int groupSize = task.GroupDim.Size; if (threadIdx < groupSize) { try { var launcher = task.KernelExecutionDelegate; // Split the grid into different chunks that will be processed // by the available multiprocessors int linearGridDim = task.GridDim.Size; int gridChunkSize = IntrinsicMath.DivRoundUp( linearGridDim, NumMultiprocessors); int gridOffset = gridChunkSize * processorIdx; int linearUserDim = task.TotalUserDim.Size; for ( int i = gridOffset, e = gridOffset + gridChunkSize; i < e; ++i) { groupContext.BeginThreadProcessing(); try { // Setup the current grid index threadContext.GridIndex = Index3D.ReconstructIndex( i, task.GridDim); // Invoke the actual kernel launcher int globalIndex = i * groupSize + threadIdx; if (globalIndex < linearUserDim) { launcher(task, globalIndex); } } finally { groupContext.EndThreadProcessing(); } } } finally { // This thread has already finished processing groupContext.FinishThreadProcessing(); warpContext.FinishThreadProcessing(); } } } finally { // Wait for all threads of all multiprocessors to arrive here processorBarrier.SignalAndWait(); // If we reach this point and we are the main thread, notify the // parent accelerator instance if (isMainThread) { finishedEventPerMultiprocessor.SignalAndWait(); } } } }
/// <summary> /// Entry point for a single processing thread. /// </summary> /// <param name="arg">The absolute thread index.</param> private void ExecuteThread(object arg) { // Get the current thread information int absoluteThreadIndex = (int)arg; int threadIdx = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor; bool isMainThread = threadIdx == 0; // Setup a new thread context for this thread and initialize the lane index int laneIdx = threadIdx % WarpSize; int warpIdx = threadIdx / WarpSize; var threadContext = new CPURuntimeThreadContext(laneIdx, warpIdx) { LinearGroupIndex = threadIdx }; threadContext.MakeCurrent(); // Setup the current warp context as it always stays the same bool isMainWarpThread = threadIdx == 0; var warpContext = warpContexts[warpIdx]; warpContext.MakeCurrent(); // Setup the current group context as it always stays the same groupContext.MakeCurrent(); CPUAcceleratorTask task = null; for (; ;) { // Get a new task to execute (if any) if (!Accelerator.WaitForTask(ref task)) { break; } // Setup the current group index threadContext.GroupIndex = Stride3D.DenseXY.ReconstructFromElementIndex( threadIdx, task.GroupDim); // Wait for all threads of all multiprocessors to arrive here Thread.MemoryBarrier(); processorBarrier.SignalAndWait(); try { // If we are an active group thread int groupSize = task.GroupDim.Size; if (threadIdx < groupSize) { try { var launcher = task.KernelExecutionDelegate; // Split the grid into different chunks that will be processed // by the available multiprocessors int linearGridDim = task.GridDim.Size; int gridChunkSize = IntrinsicMath.DivRoundUp( linearGridDim, Accelerator.NumMultiprocessors); int gridOffset = gridChunkSize * ProcessorIndex; int linearUserDim = task.TotalUserDim.Size; for ( int i = gridOffset, e = gridOffset + gridChunkSize; i < e; ++i) { BeginThreadProcessing(); try { // Setup the current grid index threadContext.GridIndex = Stride3D.DenseXY .ReconstructFromElementIndex( i, task.GridDim); // Invoke the actual kernel launcher int globalIndex = i * groupSize + threadIdx; if (globalIndex < linearUserDim) { launcher(task, globalIndex); } } finally { EndThreadProcessing(); } } } finally { // This thread has already finished processing FinishThreadProcessing(); } } } finally { // Wait for all threads of all multiprocessors to arrive here processorBarrier.SignalAndWait(); // If we reach this point and we are the main thread, notify the // parent accelerator instance if (isMainThread) { Accelerator.FinishTaskProcessing(); } } } }