/// <summary>Sorts an array of unsigned integers in parallel.</summary> public void Sort(int maxShiftWidth = 32) { Profiler.BeginSample("RadixSort"); ComputeShaderUtil.ZeroOut(ref cb_sortTemp, _dataSize); ComputeShaderUtil.ZeroOut(ref cb_indexTemp, _dataSize); ComputeShaderUtil.ZeroOut(ref cb_prefixSums, _dataSize); ComputeShaderUtil.ZeroOut(ref cb_blockSums, _blockSumsSize); ComputeShaderUtil.ZeroOut(ref cb_scanBlockSums, _blockSumsSize); // for every 2 bits from LSB to MSB: // block-wise radix sort (write blocks back to global memory) for (int shiftWidth = 0; shiftWidth < maxShiftWidth; shiftWidth += 2) { cs_radixSort.SetInt(PropertyID.shiftWidth, shiftWidth); cs_radixSort.Dispatch(kn_radixSortLocal, _sortGridSize, 1, 1); // scan global block sum array ComputeShaderUtil.ZeroOut(ref cb_scanBlockSums, _blockSumsSize); _blellochSumScan.Scan(ref cb_blockSums, ref cb_scanBlockSums, _blockSumsSize); // scatter/shuffle block-wise sorted array to final positions cs_radixSort.Dispatch(kn_globalShuffle, _sortGridSize, 1, 1); } Profiler.EndSample(); }
public void Scan( ref ComputeBuffer cb_in, ref ComputeBuffer cb_out, int dataSize, int recurseNum = 0 ) { Profiler.BeginSample("BlellochSumScan"); int blellochGridSize = _gridSizes[recurseNum]; ComputeBuffer cb_sumScanBlockSum = cb_sumScanBlockSums[recurseNum]; ComputeShaderUtil.ZeroOut(ref cb_sumScanBlockSum, blellochGridSize); // sum scan data allocated to each block cs_blellochSumScan.SetInt(PropertyID.len, dataSize); cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_out, cb_out); cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_in, cb_in); cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_blockSums, cb_sumScanBlockSum); cs_blellochSumScan.Dispatch(kn_preSumScan, blellochGridSize, 1, 1); // sum scan total sums produced by each block // use basic implementation if number of total sums is <= 2 * Graphics.M_BLOCK_SZ // (this requires only one block to do the scan) if (blellochGridSize <= Graphics.S_BLOCK_SZ) { ComputeShaderUtil.ZeroOut(ref cb_dummyGrpSums, 1); ComputeBuffer cb_preSumScanTemp = cb_preSumScanTemps[recurseNum]; ComputeShaderUtil.CopyBuffer(ref cb_sumScanBlockSum, ref cb_preSumScanTemp, blellochGridSize); cs_blellochSumScan.SetInt(PropertyID.len, blellochGridSize); cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_out, cb_sumScanBlockSum); cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_in, cb_preSumScanTemp); cs_blellochSumScan.SetBuffer(kn_preSumScan, BufferID.cb_blockSums, cb_dummyGrpSums); cs_blellochSumScan.Dispatch(kn_preSumScan, 1, 1, 1); } else // else, recurse on this same function as you'll need the full-blown scan for the block sums { ComputeBuffer cb_inBlockSum = cb_inBlockSums[recurseNum]; ComputeShaderUtil.CopyBuffer(ref cb_sumScanBlockSum, ref cb_inBlockSum, blellochGridSize); Scan(ref cb_inBlockSum, ref cb_sumScanBlockSum, blellochGridSize, recurseNum + 1); } ComputeBuffer cb_addBlockSumsTemp = cb_addBlockSumsTemps[recurseNum]; ComputeShaderUtil.CopyBuffer(ref cb_out, ref cb_addBlockSumsTemp, dataSize); // add each block's total sum to its scan output in order to get the final, global scanned array cs_blellochSumScan.SetInt(PropertyID.len, dataSize); cs_blellochSumScan.SetBuffer(kn_addBlockSums, BufferID.cb_out, cb_out); cs_blellochSumScan.SetBuffer(kn_addBlockSums, BufferID.cb_in, cb_addBlockSumsTemp); cs_blellochSumScan.SetBuffer(kn_addBlockSums, BufferID.cb_blockSums, cb_sumScanBlockSum); cs_blellochSumScan.Dispatch(kn_addBlockSums, blellochGridSize, 1, 1); Profiler.EndSample(); }
public void BlellochSumScanTest() { uint[] array = new uint[ARRAY_COUNT]; uint[] scannedArray = new uint[ARRAY_COUNT]; for (int i = 0; i < ARRAY_COUNT; i++) { array[i] = GenerateRandomUInt(); } ComputeBuffer cb_in = new ComputeBuffer(ARRAY_COUNT, StrideSize.s_uint); ComputeBuffer cb_out = new ComputeBuffer(ARRAY_COUNT, StrideSize.s_uint); ComputeShaderUtil.InitKernels(); BlellochSumScan.InitKernels(); cb_in.SetData(array); ComputeShaderUtil.ZeroOut(ref cb_out, ARRAY_COUNT); BlellochSumScan blellochSumScan = new BlellochSumScan(ARRAY_COUNT); blellochSumScan.Scan(ref cb_in, ref cb_out, ARRAY_COUNT); cb_out.GetData(scannedArray); // using serial exclusive sum scan method to make sure that the parallel method works uint sum = 0; for (int i = 0; i < ARRAY_COUNT; i++) { Assert.AreEqual(sum, scannedArray[i]); sum += array[i]; } cb_in.Dispose(); cb_out.Dispose(); blellochSumScan.Dispose(); }