private static void Kernel(
            GroupedIndex index,
            ArrayView <T> input,
            ArrayView <T> output,
            TShuffleDown shuffleDown,
            TReduction reduction,

            [SharedMemory(32)]
            ArrayView <T> sharedMemory)
        {
            var stride  = GridExtensions.GridStrideLoopStride;
            var reduced = reduction.NeutralElement;

            for (var idx = index.ComputeGlobalIndex(); idx < input.Length; idx += stride)
            {
                reduced = reduction.Reduce(reduced, input[idx]);
            }

            reduced = GroupExtensions.Reduce(
                index.GroupIdx,
                reduced,
                shuffleDown,
                reduction,
                sharedMemory);

            var finalizer = default(TReductionFinalizer);

            finalizer.Finalize(index, output, reduced, reduction);
        }
Beispiel #2
0
        /// <summary>
        /// Demonstrates the use of shared-memory variable referencing multiple elements.
        /// </summary>
        /// <param name="index">The current thread index.</param>
        /// <param name="dataView">The view pointing to our memory buffer.</param>
        /// <param name="outputView">The view pointing to our memory buffer.</param>
        /// <param name="sharedArray">Implicit shared-memory parameter that is handled by the runtime.</param>
        static void SharedMemoryArrayKernel(
            GroupedIndex index,          // The grouped thread index (1D in this case)
            ArrayView <int> dataView,    // A view to a chunk of memory (1D in this case)
            ArrayView <int> outputView,  // A view to a chunk of memory (1D in this case)

            [SharedMemory(128)]          // Declares a shared-memory array with 128 elements of
            ArrayView <int> sharedArray) // type int = 4 * 128 = 512 bytes shared memory per group
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            // Load the element into shared memory
            var value = globalIndex < dataView.Length ?
                        dataView[globalIndex] :
                        0;

            sharedArray[index.GroupIdx] = value;

            // Wait for all threads to complete the loading process
            Group.Barrier();

            // Compute the sum over all elements in the group
            int sum = 0;

            for (int i = 0, e = Group.Dimension.X; i < e; ++i)
            {
                sum += sharedArray[i];
            }

            // Store the sum
            if (globalIndex < outputView.Length)
            {
                outputView[globalIndex] = sum;
            }
        }
Beispiel #3
0
        /// <summary>
        /// Explicitly grouped kernels receive an index type (first parameter) of type:
        /// <see cref="GroupedIndex"/>, <see cref="GroupedIndex2"/> or <see cref="GroupedIndex3"/>.
        /// Shared memory is only supported in explicitly-grouped kernel contexts.
        /// Shared-memory parameters are automatically handled by the runtime and have to be
        /// annotated with the SharedMemoryAttribute. Note that currently, the only supported
        /// shared-memory parameters are VariableViews and ArrayViews.
        /// </summary>
        /// <param name="index">The current thread index.</param>
        /// <param name="dataView">The view pointing to our memory buffer.</param>
        /// <param name="sharedVariable">Implicit shared-memory parameter that is handled by the runtime.</param>
        static void SharedMemoryVariableKernel(
            GroupedIndex index,                // The grouped thread index (1D in this case)
            ArrayView <int> dataView,          // A view to a chunk of memory (1D in this case)
            ArrayView <int> outputView,        // A view to a chunk of memory (1D in this case)

            [SharedMemory]                     // Declares a single variable of type int in
            VariableView <int> sharedVariable) // shared memory (= 4 bytes)
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            // Initialize shared memory
            if (index.GroupIdx.IsFirst)
            {
                sharedVariable.Value = 0;
            }
            // Wait for the initialization to complete
            Group.Barrier();

            if (globalIndex < dataView.Length)
            {
                Atomic.Max(sharedVariable, dataView[globalIndex]);
            }

            // Wait for all threads to complete the maximum computation process
            Group.Barrier();

            // Write the maximum of all values into the data view
            if (globalIndex < outputView.Length)
            {
                outputView[globalIndex] = sharedVariable.Value;
            }
        }
Beispiel #4
0
        /// <summary>
        /// Explicitly grouped kernels receive an index type (first parameter) of type:
        /// <see cref="GroupedIndex"/>, <see cref="GroupedIndex2"/> or <see cref="GroupedIndex3"/>.
        /// Shared memory is only supported in explicitly-grouped kernel contexts and can be accesses
        /// via the static <see cref="ILGPU.SharedMemory"/> class.
        /// </summary>
        /// <param name="index">The current thread index.</param>
        /// <param name="dataView">The view pointing to our memory buffer.</param>
        static void SharedMemoryVariableKernel(
            GroupedIndex index,               // The grouped thread index (1D in this case)
            ArrayView <int> dataView,         // A view to a chunk of memory (1D in this case)
            ArrayView <int> outputView)       // A view to a chunk of memory (1D in this case)
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            // 'Allocate' a single shared memory variable of type int (= 4 bytes)
            ref int sharedVariable = ref ILGPU.SharedMemory.Allocate <int>();
Beispiel #5
0
        internal static void MemoryFenceGroupLevelKernel(
            GroupedIndex index,
            ArrayView <int> data)
        {
            var idx = index.ComputeGlobalIndex();

            data[idx] = idx;

            MemoryFence.GroupLevel();
        }
Beispiel #6
0
 /// <summary>
 /// Performs a grid-stride loop.
 /// </summary>
 /// <typeparam name="TLoopBody">The type of the loop body.</typeparam>
 /// <param name="index">The global start index.</param>
 /// <param name="length">The global length.</param>
 /// <param name="loopBody">The loop body.</param>
 public static void GridStrideLoop <TLoopBody>(
     GroupedIndex index,
     Index length,
     ref TLoopBody loopBody)
     where TLoopBody : struct, IGridStrideLoopBody
 {
     GridStrideLoop(
         index.ComputeGlobalIndex(),
         length,
         ref loopBody);
 }
Beispiel #7
0
 /// <summary>
 /// Performs a functional grid-stride loop.
 /// </summary>
 /// <typeparam name="T">The element type of the intermediate values.</typeparam>
 /// <typeparam name="TLoopBody">The type of the loop body.</typeparam>
 /// <param name="index">The global start index.</param>
 /// <param name="length">The global length.</param>
 /// <param name="input">The initial input value.</param>
 /// <param name="loopBody">The loop body.</param>
 /// <returns>The last intermediate value.</returns>
 public static T GridStrideLoop <T, TLoopBody>(
     GroupedIndex index,
     Index length,
     T input,
     TLoopBody loopBody)
     where T : struct
     where TLoopBody : struct, IGridStrideLoopBody <T>
 {
     return(GridStrideLoop(
                index.ComputeGlobalIndex(),
                length,
                input,
                loopBody));
 }
Beispiel #8
0
        /// <summary>
        /// Explicitly grouped kernels receive an index type (first parameter) of type:
        /// <see cref="GroupedIndex"/>, <see cref="GroupedIndex2"/> or <see cref="GroupedIndex3"/>.
        /// Note that you can use warp-shuffle functionality only within
        /// explicitly-grouped kernels.
        /// </summary>
        /// <param name="index">The current thread index.</param>
        /// <param name="dataView">The view pointing to our memory buffer.</param>
        static void ShuffleDownKernel(
            GroupedIndex index,               // The grouped thread index (1D in this case)
            ArrayView <int> dataView)         // A view to a chunk of memory (1D in this case)
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            // Use native shuffle-down functionality to shuffle the
            // given value by a delta of 2 lanes
            int value = index.GroupIdx;

            value = Warp.ShuffleDown(value, 2);

            dataView[globalIndex] = value;
        }
Beispiel #9
0
        /// <summary>
        /// Demonstrates the use of a group-wide barrier.
        /// </summary>
        static void GroupedKernelBarrier(
            GroupedIndex index,          // The grouped thread index (1D in this case)
            ArrayView <int> dataView,    // A view to a chunk of memory (1D in this case)
            ArrayView <int> outputView,  // A view to a chunk of memory (1D in this case)
            int constant)                // A sample uniform constant
        {
            var globalIndex = index.ComputeGlobalIndex();

            // Wait until all threads in the group reach this point
            Group.Barrier();

            if (globalIndex < dataView.Length)
            {
                outputView[globalIndex] = dataView[globalIndex] > constant ? 1 : 0;
            }
        }
Beispiel #10
0
        /// <summary>
        /// Explicitly-grouped kernels receive an index type (first parameter) of type:
        /// <see cref="GroupedIndex"/>, <see cref="GroupedIndex2"/> or <see cref="GroupedIndex3"/>.
        /// These kernel types expose the underlying blocking/grouping semantics of a GPU
        /// and allow for highly efficient implementation of kernels for different GPUs.
        /// The semantics of theses kernels are equivalent to kernel implementations in CUDA.
        /// An explicitly-grouped kernel can be loaded with:
        /// - LoadImplicitlyGroupedKernel
        /// - LoadAutoGroupedKernel.
        ///
        /// Note that you must not use warp-shuffle functionality within implicitly grouped
        /// kernels since not all lanes of a warp are guaranteed to participate in the warp shuffle.
        /// </summary>
        /// <param name="index">The current thread index.</param>
        /// <param name="dataView">The view pointing to our memory buffer.</param>
        /// <param name="constant">A nice uniform constant.</param>
        static void GroupedKernel(
            GroupedIndex index,          // The grouped thread index (1D in this case)
            ArrayView <int> dataView,    // A view to a chunk of memory (1D in this case)
            int constant)                // A sample uniform constant
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            if (globalIndex < dataView.Length)
            {
                dataView[globalIndex] = globalIndex + constant;
            }

            // Note: this explicitly grouped kernel implements the same functionality
            // as MyKernel in the ImplicitlyGroupedKernels sample.
        }
Beispiel #11
0
        /// <summary>
        /// Explicitly grouped kernels receive an index type (first parameter) of type:
        /// <see cref="GroupedIndex"/>, <see cref="GroupedIndex2"/> or <see cref="GroupedIndex3"/>.
        /// Note that you can use warp-shuffle functionality only within
        /// explicitly-grouped kernels.
        /// </summary>
        /// <typeparam name="TShuffleOperation">The type of the shuffle operation.</typeparam>
        /// <param name="index">The current thread index.</param>
        /// <param name="dataView">The view pointing to our memory buffer.</param>
        static void ShuffleDownKernel <TShuffleOperation>(
            GroupedIndex index,               // The grouped thread index (1D in this case)
            ArrayView <long> dataView)        // A view to a chunk of memory (1D in this case)
            where TShuffleOperation : struct, IShuffleDown <long>
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            // Use custom shuffle-down functionality to shuffle the
            // given value by a delta of 2 lanes
            long value = index.GroupIdx;
            TShuffleOperation shuffleOperation = default;

            value = shuffleOperation.ShuffleDown(value, 2);

            dataView[globalIndex] = value;
        }
Beispiel #12
0
        /// <summary>
        /// Demonstrates a pre-defined warp-reduction functionality.
        /// </summary>
        /// <param name="index">The current thread index.</param>
        /// <param name="dataView">The view pointing to our memory buffer.</param>
        static void ReduceKernel(
            GroupedIndex index,               // The grouped thread index (1D in this case)
            ArrayView <int> dataView)         // A view to a chunk of memory (1D in this case)
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            // Use native warp-reduce functionality to reduce all given
            // values in the scope of a single warp. Note that only
            // the first lane of a warp will contain the reduced value.
            // If all lanes should receive the reduced value,
            // use the Warp.AllReduce<...> function.
            var value = Warp.Reduce(
                1,
                new ShuffleDownInt32(),
                new AddInt32());

            dataView[globalIndex] = value;
        }
Beispiel #13
0
        /// <summary>
        /// Demonstrates the use of a group-wide and-barrier.
        /// </summary>
        static void GroupedKernelAndBarrier(
            GroupedIndex index,         // The grouped thread index (1D in this case)
            ArrayView <int> dataView,   // A view to a chunk of memory (1D in this case)
            ArrayView <int> outputView, // A view to a chunk of memory (1D in this case)
            int constant)               // A sample uniform constant
        {
            // Compute the global 1D index for accessing the data view
            var globalIndex = index.ComputeGlobalIndex();

            // Load value iff the index is in range
            var value = globalIndex < dataView.Length ?
                        dataView[globalIndex] :
                        constant + 1;

            // Wait until all threads in the group reach this point. Moreover, BarrierAnd
            // evaluates the given predicate and returns true iff the predicate evaluates
            // to true for all threads in the group.
            var found = Group.BarrierAnd(value > constant);

            if (globalIndex < outputView.Length)
            {
                outputView[globalIndex] = found ? 1 : 0;
            }
        }