Example #1
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The relative thread index.</param>
        private void ExecuteThread(object arg)
        {
            var relativeThreadIdx = (int)arg;

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                lock (taskSynchronizationObject)
                {
                    while ((currentTask == null | currentTask == task) & running)
                    {
                        Monitor.Wait(taskSynchronizationObject);
                    }
                    if (!running)
                    {
                        break;
                    }
                    task = currentTask;
                }

                Debug.Assert(task != null, "Invalid task");

                var groupThreadSize       = ComputeNumGroupThreads(task.GroupDim.Size);
                var runtimeGroupThreadIdx = relativeThreadIdx % groupThreadSize;
                var runtimeGroupIdx       = relativeThreadIdx / groupThreadSize;
                var numRuntimeGroups      = NumThreads / groupThreadSize;
                var numUsedThreads        = numRuntimeGroups * groupThreadSize;
                Debug.Assert(numUsedThreads > 0, "Invalid group size");

                // Check whether we are an active thread
                if (relativeThreadIdx < numUsedThreads)
                {
                    // Bind the context to the current thread
                    var groupContext = groupContexts[runtimeGroupIdx];
                    groupContext.MakeCurrent();
                    var runtimeDimension = task.RuntimeDimension;
                    var chunkSize        = (runtimeDimension + numRuntimeGroups - 1) / numRuntimeGroups;
                    chunkSize = ((chunkSize + groupThreadSize - 1) / groupThreadSize) * groupThreadSize;
                    var chunkOffset = chunkSize * runtimeGroupIdx;

                    // Setup current indices
                    CPURuntimeThreadContext.SetupDimensions(task.GridDim, task.GroupDim);

                    // Prepare execution
                    groupContext.WaitForNextThreadIndex();

                    var targetDimension = Math.Min(task.UserDimension, runtimeDimension);
                    task.Execute(
                        groupContext,
                        runtimeGroupThreadIdx,
                        groupThreadSize,
                        chunkSize,
                        chunkOffset,
                        targetDimension);
                }

                finishedEvent.SignalAndWait();
            }
        }
Example #2
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The absolute thread index.</param>
        private void ExecuteThread(object arg)
        {
            // Get the current thread information
            int absoluteThreadIndex = (int)arg;
            int threadIdx           = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor;
            var processorIdx        = absoluteThreadIndex / MaxNumThreadsPerMultiprocessor;

            var  processorBarrier = processorBarriers[processorIdx];
            bool isMainThread     = threadIdx == 0;

            // Setup a new thread context for this thread and initialize the lane index
            int laneIdx       = threadIdx % WarpSize;
            var threadContext = new CPURuntimeThreadContext(laneIdx)
            {
                LinearGroupIndex = threadIdx
            };

            threadContext.MakeCurrent();

            // Setup the current warp context as it always stays the same
            int  warpIdx          = threadIdx / WarpSize;
            bool isMainWarpThread = threadIdx == 0;
            var  warpContext      = warpContexts[processorIdx, warpIdx];

            warpContext.MakeCurrent();

            // Setup the current group context as it always stays the same
            var groupContext = groupContexts[processorIdx];

            groupContext.MakeCurrent();

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                // Get a new task to execute
                lock (taskSynchronizationObject)
                {
                    while ((currentTask == null | currentTask == task) & running)
                    {
                        Monitor.Wait(taskSynchronizationObject);
                    }
                    if (!running)
                    {
                        break;
                    }
                    task = currentTask;
                }
                Debug.Assert(task != null, "Invalid task");

                // Setup the current group index
                threadContext.GroupIndex = Index3D.ReconstructIndex(
                    threadIdx,
                    task.GroupDim);

                // Wait for all threads of all multiprocessors to arrive here
                Thread.MemoryBarrier();
                processorBarrier.SignalAndWait();

                try
                {
                    // If we are an active group thread
                    int groupSize = task.GroupDim.Size;
                    if (threadIdx < groupSize)
                    {
                        try
                        {
                            var launcher = task.KernelExecutionDelegate;

                            // Split the grid into different chunks that will be processed
                            // by the available multiprocessors
                            int linearGridDim = task.GridDim.Size;
                            int gridChunkSize = IntrinsicMath.DivRoundUp(
                                linearGridDim,
                                NumMultiprocessors);
                            int gridOffset    = gridChunkSize * processorIdx;
                            int linearUserDim = task.TotalUserDim.Size;
                            for (
                                int i = gridOffset, e = gridOffset + gridChunkSize;
                                i < e;
                                ++i)
                            {
                                groupContext.BeginThreadProcessing();
                                try
                                {
                                    // Setup the current grid index
                                    threadContext.GridIndex = Index3D.ReconstructIndex(
                                        i,
                                        task.GridDim);

                                    // Invoke the actual kernel launcher
                                    int globalIndex = i * groupSize + threadIdx;
                                    if (globalIndex < linearUserDim)
                                    {
                                        launcher(task, globalIndex);
                                    }
                                }
                                finally
                                {
                                    groupContext.EndThreadProcessing();
                                }
                            }
                        }
                        finally
                        {
                            // This thread has already finished processing
                            groupContext.FinishThreadProcessing();
                            warpContext.FinishThreadProcessing();
                        }
                    }
                }
                finally
                {
                    // Wait for all threads of all multiprocessors to arrive here
                    processorBarrier.SignalAndWait();

                    // If we reach this point and we are the main thread, notify the
                    // parent accelerator instance
                    if (isMainThread)
                    {
                        finishedEventPerMultiprocessor.SignalAndWait();
                    }
                }
            }
        }
 /// <summary>
 /// Makes the current context the active one for this thread.
 /// </summary>
 internal void MakeCurrent() => currentContext = this;
Example #4
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The absolute thread index.</param>
        private void ExecuteThread(object arg)
        {
            // Get the current thread information
            int absoluteThreadIndex = (int)arg;
            int threadIdx           = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor;

            bool isMainThread = threadIdx == 0;

            // Setup a new thread context for this thread and initialize the lane index
            int laneIdx       = threadIdx % WarpSize;
            int warpIdx       = threadIdx / WarpSize;
            var threadContext = new CPURuntimeThreadContext(laneIdx, warpIdx)
            {
                LinearGroupIndex = threadIdx
            };

            threadContext.MakeCurrent();

            // Setup the current warp context as it always stays the same
            bool isMainWarpThread = threadIdx == 0;
            var  warpContext      = warpContexts[warpIdx];

            warpContext.MakeCurrent();

            // Setup the current group context as it always stays the same
            groupContext.MakeCurrent();

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                // Get a new task to execute (if any)
                if (!Accelerator.WaitForTask(ref task))
                {
                    break;
                }

                // Setup the current group index
                threadContext.GroupIndex = Stride3D.DenseXY.ReconstructFromElementIndex(
                    threadIdx,
                    task.GroupDim);

                // Wait for all threads of all multiprocessors to arrive here
                Thread.MemoryBarrier();
                processorBarrier.SignalAndWait();

                try
                {
                    // If we are an active group thread
                    int groupSize = task.GroupDim.Size;
                    if (threadIdx < groupSize)
                    {
                        try
                        {
                            var launcher = task.KernelExecutionDelegate;

                            // Split the grid into different chunks that will be processed
                            // by the available multiprocessors
                            int linearGridDim = task.GridDim.Size;
                            int gridChunkSize = IntrinsicMath.DivRoundUp(
                                linearGridDim,
                                Accelerator.NumMultiprocessors);
                            int gridOffset    = gridChunkSize * ProcessorIndex;
                            int linearUserDim = task.TotalUserDim.Size;
                            for (
                                int i = gridOffset, e = gridOffset + gridChunkSize;
                                i < e;
                                ++i)
                            {
                                BeginThreadProcessing();
                                try
                                {
                                    // Setup the current grid index
                                    threadContext.GridIndex = Stride3D.DenseXY
                                                              .ReconstructFromElementIndex(
                                        i,
                                        task.GridDim);

                                    // Invoke the actual kernel launcher
                                    int globalIndex = i * groupSize + threadIdx;
                                    if (globalIndex < linearUserDim)
                                    {
                                        launcher(task, globalIndex);
                                    }
                                }
                                finally
                                {
                                    EndThreadProcessing();
                                }
                            }
                        }
                        finally
                        {
                            // This thread has already finished processing
                            FinishThreadProcessing();
                        }
                    }
                }
                finally
                {
                    // Wait for all threads of all multiprocessors to arrive here
                    processorBarrier.SignalAndWait();

                    // If we reach this point and we are the main thread, notify the
                    // parent accelerator instance
                    if (isMainThread)
                    {
                        Accelerator.FinishTaskProcessing();
                    }
                }
            }
        }