public void Finish() { // Perform group wide reduction ReducedValue = GroupExtensions.Reduce <T, TReduction>(ReducedValue); if (Group.IsFirstThread) { GetReduction().AtomicApply(ref Output[0], ReducedValue); } }
/// <summary> /// The actual reduction implementation. /// </summary> /// <typeparam name="T">The underlying type of the reduction.</typeparam> /// <typeparam name="TReduction">The type of the reduction logic.</typeparam> /// <param name="input">The input view.</param> /// <param name="output">The output view.</param> internal static void ReductionKernel <T, TReduction>( ArrayView <T> input, ArrayView <T> output) where T : unmanaged where TReduction : struct, IScanReduceOperation <T> { var stride = GridExtensions.GridStrideLoopStride; TReduction reduction = default; var reduced = reduction.Identity; for (var idx = Grid.GlobalIndex.X; idx < input.Length; idx += stride) { reduced = reduction.Apply(reduced, input[idx]); } reduced = GroupExtensions.Reduce <T, TReduction>(reduced); if (Group.IsFirstThread) { reduction.AtomicApply(ref output[0], reduced); } }
/// <summary> /// Performs the first radix-sort pass. /// </summary> /// <typeparam name="T">The element type.</typeparam> /// <typeparam name="TOperation">The radix-sort operation.</typeparam> /// <typeparam name="TSpecialization">The specialization type.</typeparam> /// <param name="view">The input view to use.</param> /// <param name="counter">The global counter view.</param> /// <param name="groupSize">The number of threads in the group.</param> /// <param name="numGroups">The number of virtually launched groups.</param> /// <param name="paddedLength">The padded length of the input view.</param> /// <param name="shift">The bit shift to use.</param> internal static void RadixSortKernel1 <T, TOperation, TSpecialization>( ArrayView <T> view, ArrayView <int> counter, SpecializedValue <int> groupSize, int numGroups, int paddedLength, int shift) where T : unmanaged where TOperation : struct, IRadixSortOperation <T> where TSpecialization : struct, IRadixSortSpecialization { TSpecialization specialization = default; var scanMemory = SharedMemory.Allocate <int>( groupSize * specialization.UnrollFactor); int gridIdx = Grid.IdxX; for ( int i = Grid.GlobalIndex.X; i < paddedLength; i += GridExtensions.GridStrideLoopStride) { bool inRange = i < view.Length; // Read value from global memory TOperation operation = default; T value = operation.DefaultValue; if (inRange) { value = view[i]; } var bits = operation.ExtractRadixBits( value, shift, specialization.UnrollFactor - 1); for (int j = 0; j < specialization.UnrollFactor; ++j) { scanMemory[Group.IdxX + groupSize * j] = 0; } if (inRange) { scanMemory[Group.IdxX + groupSize * bits] = 1; } Group.Barrier(); for (int j = 0; j < specialization.UnrollFactor; ++j) { var address = Group.IdxX + groupSize * j; scanMemory[address] = GroupExtensions.ExclusiveScan <int, AddInt32>(scanMemory[address]); } Group.Barrier(); if (Group.IdxX == Group.DimX - 1) { // Write counters to global memory for (int j = 0; j < specialization.UnrollFactor; ++j) { ref var newOffset = ref scanMemory[Group.IdxX + groupSize * j]; newOffset += Utilities.Select(inRange & j == bits, 1, 0); counter[j * numGroups + gridIdx] = newOffset; } } Group.Barrier(); var gridSize = gridIdx * Group.DimX; Index1 pos = gridSize + scanMemory[Group.IdxX + groupSize * bits] - Utilities.Select(inRange & Group.IdxX == Group.DimX - 1, 1, 0); for (int j = 1; j <= bits; ++j) { pos += scanMemory[groupSize * j - 1] + Utilities.Select(j - 1 == bits, 1, 0); } // Pre-sort the current value into the corresponding segment if (inRange) { view[pos] = value; } Group.Barrier(); gridIdx += Grid.DimX; }