Esempio n. 1
0
        /// <summary>
        /// Setups all runtime classes related to <see cref="CPURuntimeGroupContext"/>
        /// and <see cref="CPURuntimeWarpContext"/>.
        /// </summary>
        /// <param name="task">The current CPU task.</param>
        private void SetupRuntimeClasses(CPUAcceleratorTask task)
        {
            // Setup groups contexts
            int groupSize = task.GroupDim.Size;
            int numWarps  = IntrinsicMath.DivRoundUp(groupSize, WarpSize);

            if (numWarps * WarpSize > MaxNumThreadsPerGroup)
            {
                throw new NotSupportedException(string.Format(
                                                    RuntimeErrorMessages.NotSupportedTotalGroupSize,
                                                    MaxNumThreadsPerGroup));
            }

            for (int i = 0, e = NumMultiprocessors; i < e; ++i)
            {
                // Initialize the associated group context
                var context = groupContexts[i];
                context.Initialize(
                    task.GridDim,
                    task.GroupDim,
                    task.DynamicSharedMemoryConfig);

                // Initialize each involved warp context
                for (int j = 0, e2 = numWarps - 1; j < e2; ++j)
                {
                    warpContexts[i, j].Initialize(WarpSize);
                }

                int lastWarpSize = groupSize % WarpSize == 0
                    ? WarpSize
                    : groupSize % WarpSize;
                warpContexts[i, numWarps - 1].Initialize(lastWarpSize);
            }

            // Setup sequential execution objects
        }
Esempio n. 2
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (disposing)
     {
         lock (taskSynchronizationObject)
         {
             running     = false;
             currentTask = null;
             Monitor.PulseAll(taskSynchronizationObject);
         }
         foreach (var thread in threads)
         {
             thread.Join();
         }
         threads = null;
         foreach (var group in groupContexts)
         {
             group.Dispose();
         }
         groupContexts = null;
         finishedEvent.Dispose();
     }
     base.Dispose(disposing);
 }
Esempio n. 3
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The relative thread index.</param>
        private void ExecuteThread(object arg)
        {
            var relativeThreadIdx = (int)arg;

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                lock (taskSynchronizationObject)
                {
                    while ((currentTask == null | currentTask == task) & running)
                    {
                        Monitor.Wait(taskSynchronizationObject);
                    }
                    if (!running)
                    {
                        break;
                    }
                    task = currentTask;
                }

                Debug.Assert(task != null, "Invalid task");

                var groupThreadSize       = ComputeNumGroupThreads(task.GroupDim.Size);
                var runtimeGroupThreadIdx = relativeThreadIdx % groupThreadSize;
                var runtimeGroupIdx       = relativeThreadIdx / groupThreadSize;
                var numRuntimeGroups      = NumThreads / groupThreadSize;
                var numUsedThreads        = numRuntimeGroups * groupThreadSize;
                Debug.Assert(numUsedThreads > 0, "Invalid group size");

                // Check whether we are an active thread
                if (relativeThreadIdx < numUsedThreads)
                {
                    // Bind the context to the current thread
                    var groupContext = groupContexts[runtimeGroupIdx];
                    groupContext.MakeCurrent();
                    var runtimeDimension = task.RuntimeDimension;
                    var chunkSize        = (runtimeDimension + numRuntimeGroups - 1) / numRuntimeGroups;
                    chunkSize = ((chunkSize + groupThreadSize - 1) / groupThreadSize) * groupThreadSize;
                    var chunkOffset = chunkSize * runtimeGroupIdx;

                    // Setup current indices
                    CPURuntimeThreadContext.SetupDimensions(task.GridDim, task.GroupDim);

                    // Prepare execution
                    groupContext.WaitForNextThreadIndex();

                    var targetDimension = Math.Min(task.UserDimension, runtimeDimension);
                    Debug.Assert(groupContext.SharedMemory.LengthInBytes == task.SharedMemSize, "Invalid shared-memory initialization");
                    task.Execute(
                        groupContext,
                        runtimeGroupThreadIdx,
                        groupThreadSize,
                        chunkSize,
                        chunkOffset,
                        targetDimension);
                }

                finishedEvent.SignalAndWait();
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The absolute thread index.</param>
        private void ExecuteThread(object arg)
        {
            // Get the current thread information
            int absoluteThreadIndex = (int)arg;
            int threadIdx           = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor;
            var processorIdx        = absoluteThreadIndex / MaxNumThreadsPerMultiprocessor;

            var  processorBarrier = processorBarriers[processorIdx];
            bool isMainThread     = threadIdx == 0;

            // Setup a new thread context for this thread and initialize the lane index
            int laneIdx       = threadIdx % WarpSize;
            var threadContext = new CPURuntimeThreadContext(laneIdx)
            {
                LinearGroupIndex = threadIdx
            };

            threadContext.MakeCurrent();

            // Setup the current warp context as it always stays the same
            int  warpIdx          = threadIdx / WarpSize;
            bool isMainWarpThread = threadIdx == 0;
            var  warpContext      = warpContexts[processorIdx, warpIdx];

            warpContext.MakeCurrent();

            // Setup the current group context as it always stays the same
            var groupContext = groupContexts[processorIdx];

            groupContext.MakeCurrent();

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                // Get a new task to execute
                lock (taskSynchronizationObject)
                {
                    while ((currentTask == null | currentTask == task) & running)
                    {
                        Monitor.Wait(taskSynchronizationObject);
                    }
                    if (!running)
                    {
                        break;
                    }
                    task = currentTask;
                }
                Debug.Assert(task != null, "Invalid task");

                // Setup the current group index
                threadContext.GroupIndex = Index3D.ReconstructIndex(
                    threadIdx,
                    task.GroupDim);

                // Wait for all threads of all multiprocessors to arrive here
                Thread.MemoryBarrier();
                processorBarrier.SignalAndWait();

                try
                {
                    // If we are an active group thread
                    int groupSize = task.GroupDim.Size;
                    if (threadIdx < groupSize)
                    {
                        try
                        {
                            var launcher = task.KernelExecutionDelegate;

                            // Split the grid into different chunks that will be processed
                            // by the available multiprocessors
                            int linearGridDim = task.GridDim.Size;
                            int gridChunkSize = IntrinsicMath.DivRoundUp(
                                linearGridDim,
                                NumMultiprocessors);
                            int gridOffset    = gridChunkSize * processorIdx;
                            int linearUserDim = task.TotalUserDim.Size;
                            for (
                                int i = gridOffset, e = gridOffset + gridChunkSize;
                                i < e;
                                ++i)
                            {
                                groupContext.BeginThreadProcessing();
                                try
                                {
                                    // Setup the current grid index
                                    threadContext.GridIndex = Index3D.ReconstructIndex(
                                        i,
                                        task.GridDim);

                                    // Invoke the actual kernel launcher
                                    int globalIndex = i * groupSize + threadIdx;
                                    if (globalIndex < linearUserDim)
                                    {
                                        launcher(task, globalIndex);
                                    }
                                }
                                finally
                                {
                                    groupContext.EndThreadProcessing();
                                }
                            }
                        }
                        finally
                        {
                            // This thread has already finished processing
                            groupContext.FinishThreadProcessing();
                            warpContext.FinishThreadProcessing();
                        }
                    }
                }
                finally
                {
                    // Wait for all threads of all multiprocessors to arrive here
                    processorBarrier.SignalAndWait();

                    // If we reach this point and we are the main thread, notify the
                    // parent accelerator instance
                    if (isMainThread)
                    {
                        finishedEventPerMultiprocessor.SignalAndWait();
                    }
                }
            }
        }
Esempio n. 5
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The relative thread index.</param>
        private void ExecuteThread(object arg)
        {
            var relativeThreadIdx = (int)arg;
            var warpContext       = warpContexts[relativeThreadIdx / WarpSize];

            Debug.Assert(warpContext != null, "Invalid warp context");
            warpContext.MakeCurrent();

            CPUAcceleratorTask task = null;

            for (;;)
            {
                lock (taskSynchronizationObject)
                {
                    while ((currentTask == null | currentTask == task) & running)
                    {
                        Monitor.Wait(taskSynchronizationObject);
                    }
                    if (!running)
                    {
                        break;
                    }
                    task = currentTask;
                }

                Debug.Assert(task != null, "Invalid task");

                var groupThreadSize       = ComputeNumGroupThreads(task.GroupDim.Size);
                var runtimeGroupThreadIdx = relativeThreadIdx % groupThreadSize;
                warpContext.Initialize(runtimeGroupThreadIdx, out int runtimeThreadOffset);
                var runtimeGroupIdx  = relativeThreadIdx / groupThreadSize;
                var numRuntimeGroups = NumThreads / groupThreadSize;
                var numUsedThreads   = numRuntimeGroups * groupThreadSize;
                Debug.Assert(numUsedThreads > 0, "Invalid group size");

                // Check whether we are an active thread
                if (relativeThreadIdx < numUsedThreads)
                {
                    // Bind the context to the current thread
                    groupContexts[runtimeGroupIdx].MakeCurrent(out ArrayView <byte> sharedMemory, out Barrier groupBarrier);
                    var runtimeDimension = task.RuntimeDimension;
                    var chunkSize        = (runtimeDimension + numRuntimeGroups - 1) / numRuntimeGroups;
                    chunkSize = ((chunkSize + groupThreadSize - 1) / groupThreadSize) * groupThreadSize;
                    var chunkOffset = chunkSize * runtimeGroupIdx;

                    var targetDimension = Math.Min(task.UserDimension, runtimeDimension);
                    Debug.Assert(sharedMemory.LengthInBytes == task.SharedMemSize, "Invalid shared-memory initialization");
                    task.Execute(
                        groupBarrier,
                        sharedMemory,
                        runtimeThreadOffset,
                        groupThreadSize,
                        numRuntimeGroups,
                        numUsedThreads,
                        chunkSize,
                        chunkOffset,
                        targetDimension);
                }

                finishedEvent.SignalAndWait();
            }
        }
Esempio n. 6
0
 /// <summary>
 /// Begins a accelerator task.
 /// </summary>
 /// <param name="task">The task to launch.</param>
 protected abstract void BeginLaunch(CPUAcceleratorTask task);
Esempio n. 7
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The absolute thread index.</param>
        private void ExecuteThread(object arg)
        {
            // Get the current thread information
            int absoluteThreadIndex = (int)arg;
            int threadIdx           = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor;

            bool isMainThread = threadIdx == 0;

            // Setup a new thread context for this thread and initialize the lane index
            int laneIdx       = threadIdx % WarpSize;
            int warpIdx       = threadIdx / WarpSize;
            var threadContext = new CPURuntimeThreadContext(laneIdx, warpIdx)
            {
                LinearGroupIndex = threadIdx
            };

            threadContext.MakeCurrent();

            // Setup the current warp context as it always stays the same
            bool isMainWarpThread = threadIdx == 0;
            var  warpContext      = warpContexts[warpIdx];

            warpContext.MakeCurrent();

            // Setup the current group context as it always stays the same
            groupContext.MakeCurrent();

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                // Get a new task to execute (if any)
                if (!Accelerator.WaitForTask(ref task))
                {
                    break;
                }

                // Setup the current group index
                threadContext.GroupIndex = Stride3D.DenseXY.ReconstructFromElementIndex(
                    threadIdx,
                    task.GroupDim);

                // Wait for all threads of all multiprocessors to arrive here
                Thread.MemoryBarrier();
                processorBarrier.SignalAndWait();

                try
                {
                    // If we are an active group thread
                    int groupSize = task.GroupDim.Size;
                    if (threadIdx < groupSize)
                    {
                        try
                        {
                            var launcher = task.KernelExecutionDelegate;

                            // Split the grid into different chunks that will be processed
                            // by the available multiprocessors
                            int linearGridDim = task.GridDim.Size;
                            int gridChunkSize = IntrinsicMath.DivRoundUp(
                                linearGridDim,
                                Accelerator.NumMultiprocessors);
                            int gridOffset    = gridChunkSize * ProcessorIndex;
                            int linearUserDim = task.TotalUserDim.Size;
                            for (
                                int i = gridOffset, e = gridOffset + gridChunkSize;
                                i < e;
                                ++i)
                            {
                                BeginThreadProcessing();
                                try
                                {
                                    // Setup the current grid index
                                    threadContext.GridIndex = Stride3D.DenseXY
                                                              .ReconstructFromElementIndex(
                                        i,
                                        task.GridDim);

                                    // Invoke the actual kernel launcher
                                    int globalIndex = i * groupSize + threadIdx;
                                    if (globalIndex < linearUserDim)
                                    {
                                        launcher(task, globalIndex);
                                    }
                                }
                                finally
                                {
                                    EndThreadProcessing();
                                }
                            }
                        }
                        finally
                        {
                            // This thread has already finished processing
                            FinishThreadProcessing();
                        }
                    }
                }
                finally
                {
                    // Wait for all threads of all multiprocessors to arrive here
                    processorBarrier.SignalAndWait();

                    // If we reach this point and we are the main thread, notify the
                    // parent accelerator instance
                    if (isMainThread)
                    {
                        Accelerator.FinishTaskProcessing();
                    }
                }
            }
        }