/// <summary>Sorts an array of unsigned integers in parallel.</summary>
        public void Sort(int maxShiftWidth = 32)
        {
            Profiler.BeginSample("RadixSort");
            ComputeShaderUtil.ZeroOut(ref cb_sortTemp, _dataSize);
            ComputeShaderUtil.ZeroOut(ref cb_indexTemp, _dataSize);
            ComputeShaderUtil.ZeroOut(ref cb_prefixSums, _dataSize);
            ComputeShaderUtil.ZeroOut(ref cb_blockSums, _blockSumsSize);
            ComputeShaderUtil.ZeroOut(ref cb_scanBlockSums, _blockSumsSize);

            // for every 2 bits from LSB to MSB:
            // block-wise radix sort (write blocks back to global memory)
            for (int shiftWidth = 0; shiftWidth < maxShiftWidth; shiftWidth += 2)
            {
                cs_radixSort.SetInt(PropertyID.shiftWidth, shiftWidth);
                cs_radixSort.Dispatch(kn_radixSortLocal, _sortGridSize, 1, 1);

                // scan global block sum array
                ComputeShaderUtil.ZeroOut(ref cb_scanBlockSums, _blockSumsSize);
                _blellochSumScan.Scan(ref cb_blockSums, ref cb_scanBlockSums, _blockSumsSize);

                // scatter/shuffle block-wise sorted array to final positions
                cs_radixSort.Dispatch(kn_globalShuffle, _sortGridSize, 1, 1);
            }
            Profiler.EndSample();
        }
Esempio n. 2
0
        public void Scan(
            ref ComputeBuffer cb_in, ref ComputeBuffer cb_out,
            int dataSize, int recurseNum = 0
            )
        {
            Profiler.BeginSample("BlellochSumScan");
            int blellochGridSize = _gridSizes[recurseNum];

            ComputeBuffer cb_sumScanBlockSum = cb_sumScanBlockSums[recurseNum];

            ComputeShaderUtil.ZeroOut(ref cb_sumScanBlockSum, blellochGridSize);

            // sum scan data allocated to each block
            cs_blellochSumScan.SetInt(PropertyID.len, dataSize);
            cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_out, cb_out);
            cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_in, cb_in);
            cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_blockSums, cb_sumScanBlockSum);
            cs_blellochSumScan.Dispatch(kn_preSumScan, blellochGridSize, 1, 1);

            // sum scan total sums produced by each block
            // use basic implementation if number of total sums is <= 2 * Graphics.M_BLOCK_SZ
            // (this requires only one block to do the scan)
            if (blellochGridSize <= Graphics.S_BLOCK_SZ)
            {
                ComputeShaderUtil.ZeroOut(ref cb_dummyGrpSums, 1);

                ComputeBuffer cb_preSumScanTemp = cb_preSumScanTemps[recurseNum];
                ComputeShaderUtil.CopyBuffer(ref cb_sumScanBlockSum, ref cb_preSumScanTemp, blellochGridSize);

                cs_blellochSumScan.SetInt(PropertyID.len, blellochGridSize);
                cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_out, cb_sumScanBlockSum);
                cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_in, cb_preSumScanTemp);
                cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_blockSums, cb_dummyGrpSums);
                cs_blellochSumScan.Dispatch(kn_preSumScan, 1, 1, 1);
            }
            else // else, recurse on this same function as you'll need the full-blown scan for the block sums
            {
                ComputeBuffer cb_inBlockSum = cb_inBlockSums[recurseNum];
                ComputeShaderUtil.CopyBuffer(ref cb_sumScanBlockSum, ref cb_inBlockSum, blellochGridSize);
                Scan(ref cb_inBlockSum, ref cb_sumScanBlockSum, blellochGridSize, recurseNum + 1);
            }

            ComputeBuffer cb_addBlockSumsTemp = cb_addBlockSumsTemps[recurseNum];

            ComputeShaderUtil.CopyBuffer(ref cb_out, ref cb_addBlockSumsTemp, dataSize);
            // add each block's total sum to its scan output in order to get the final, global scanned array
            cs_blellochSumScan.SetInt(PropertyID.len, dataSize);
            cs_blellochSumScan.SetBuffer(kn_addBlockSums, BufferID.cb_out, cb_out);
            cs_blellochSumScan.SetBuffer(kn_addBlockSums, BufferID.cb_in, cb_addBlockSumsTemp);
            cs_blellochSumScan.SetBuffer(kn_addBlockSums, BufferID.cb_blockSums, cb_sumScanBlockSum);
            cs_blellochSumScan.Dispatch(kn_addBlockSums, blellochGridSize, 1, 1);

            Profiler.EndSample();
        }
        public void BlellochSumScanTest()
        {
            uint[] array        = new uint[ARRAY_COUNT];
            uint[] scannedArray = new uint[ARRAY_COUNT];
            for (int i = 0; i < ARRAY_COUNT; i++)
            {
                array[i] = GenerateRandomUInt();
            }

            ComputeBuffer cb_in  = new ComputeBuffer(ARRAY_COUNT, StrideSize.s_uint);
            ComputeBuffer cb_out = new ComputeBuffer(ARRAY_COUNT, StrideSize.s_uint);

            ComputeShaderUtil.InitKernels();
            BlellochSumScan.InitKernels();

            cb_in.SetData(array);
            ComputeShaderUtil.ZeroOut(ref cb_out, ARRAY_COUNT);

            BlellochSumScan blellochSumScan = new BlellochSumScan(ARRAY_COUNT);

            blellochSumScan.Scan(ref cb_in, ref cb_out, ARRAY_COUNT);

            cb_out.GetData(scannedArray);

            // using serial exclusive sum scan method to make sure that the parallel method works
            uint sum = 0;

            for (int i = 0; i < ARRAY_COUNT; i++)
            {
                Assert.AreEqual(sum, scannedArray[i]);
                sum += array[i];
            }

            cb_in.Dispose();
            cb_out.Dispose();
            blellochSumScan.Dispose();
        }