Exemple #1
0
        public void GroupedIndex3EntryPoint(int length)
        {
            var end = (int)Math.Pow(Accelerator.MaxNumThreadsPerGroup, 1.0 / 3.0);

            for (int i = 1; i <= end; i <<= 1)
            {
                var stride = new Index3(i, i, i);
                var extent = new KernelConfig(
                    new Index3(length, length, length),
                    stride);
                using var buffer = Accelerator.Allocate <int>(extent.Size);
                buffer.MemSetToZero(Accelerator.DefaultStream);
                Execute(extent, buffer.View, stride, extent.GridDim);

                var expected = new int[extent.Size];
                for (int j = 0; j < length * length * length; ++j)
                {
                    var gridIdx = Index3.ReconstructIndex(j, extent.GridDim);
                    for (int k = 0; k < i * i * i; ++k)
                    {
                        var groupIdx = Index3.ReconstructIndex(k, extent.GroupDim);
                        var idx      = (gridIdx * stride + groupIdx).ComputeLinearIndex(extent.GridDim);
                        expected[idx] = idx;
                    }
                }

                Verify(buffer, expected);
            }
        }
Exemple #2
0
             5, 6, 700_000)] // yz > int.MaxValue
        public void ReconstructIndex3(
            long linearIndex, int dimX, int dimY, int dimZ,
            int expectedX, int expectedY, int expectedZ)
        {
            var index = Index3.ReconstructIndex(
                linearIndex,
                new Index3(dimX, dimY, dimZ));

            Assert.Equal(expectedX, index.X);
            Assert.Equal(expectedY, index.Y);
            Assert.Equal(expectedZ, index.Z);
        }
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The absolute thread index.</param>
        private void ExecuteThread(object arg)
        {
            // Get the current thread information
            int absoluteThreadIndex = (int)arg;
            int threadIdx           = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor;
            var processorIdx        = absoluteThreadIndex / MaxNumThreadsPerMultiprocessor;

            var  processorBarrier = processorBarriers[processorIdx];
            bool isMainThread     = threadIdx == 0;

            // Setup a new thread context for this thread and initialize the lane index
            int laneIdx       = threadIdx % WarpSize;
            var threadContext = new CPURuntimeThreadContext(laneIdx)
            {
                LinearGroupIndex = threadIdx
            };

            threadContext.MakeCurrent();

            // Setup the current warp context as it always stays the same
            int  warpIdx          = threadIdx / WarpSize;
            bool isMainWarpThread = threadIdx == 0;
            var  warpContext      = warpContexts[processorIdx, warpIdx];

            warpContext.MakeCurrent();

            // Setup the current group context as it always stays the same
            var groupContext = groupContexts[processorIdx];

            groupContext.MakeCurrent();

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                // Get a new task to execute
                lock (taskSynchronizationObject)
                {
                    while ((currentTask == null | currentTask == task) & running)
                    {
                        Monitor.Wait(taskSynchronizationObject);
                    }
                    if (!running)
                    {
                        break;
                    }
                    task = currentTask;
                }
                Debug.Assert(task != null, "Invalid task");

                // Setup the current group index
                threadContext.GroupIndex = Index3.ReconstructIndex(
                    threadIdx,
                    task.GroupDim);

                // Wait for all threads of all multiprocessors to arrive here
                Thread.MemoryBarrier();
                processorBarrier.SignalAndWait();

                // If we are an active group thread
                int groupSize = task.GroupDim.Size;
                if (threadIdx < groupSize)
                {
                    var launcher = task.KernelExecutionDelegate;

                    // Split the grid into different chunks that will be processed by the
                    // available multiprocessors
                    int linearGridDim = task.GridDim.Size;
                    int gridChunkSize = IntrinsicMath.DivRoundUp(
                        linearGridDim,
                        NumMultiprocessors);
                    int gridOffset    = gridChunkSize * processorIdx;
                    int linearUserDim = task.TotalUserDim.Size;
                    for (int i = gridOffset, e = gridOffset + gridChunkSize; i < e; ++i)
                    {
                        groupContext.BeginThreadProcessing();

                        // Setup the current grid index
                        threadContext.GridIndex = Index3.ReconstructIndex(
                            i,
                            task.GridDim);

                        // Invoke the actual kernel launcher
                        int globalIndex = i * groupSize + threadIdx;
                        if (globalIndex < linearUserDim)
                        {
                            launcher(task, globalIndex);
                        }

                        groupContext.EndThreadProcessing();
                    }

                    // This thread has already finished processing
                    groupContext.FinishThreadProcessing();
                    warpContext.FinishThreadProcessing();
                }

                // Wait for all threads of all multiprocessors to arrive here
                processorBarrier.SignalAndWait();

                // If we reach this point and we are the main thread, notify the parent
                // accelerator instance
                if (isMainThread)
                {
                    finishedEventPerMultiprocessor.SignalAndWait();
                }
            }
        }
Exemple #4
0
 private static Index3 Reconstruct3DIndex(Index3 totalDim, int linearIndex) =>
 totalDim.ReconstructIndex(linearIndex);