예제 #1
0
파일: RNG.cs 프로젝트: m4rs-mt/ILGPU
        private readonly ref TRandomProvider GetRandomProvider()
        {
            // Compute the global warp index
            int groupIndex    = Group.LinearIndex;
            int warpIndex     = Warp.ComputeWarpIdx(groupIndex);
            int groupStride   = XMath.DivRoundUp(Group.Dimension.Size, Warp.WarpSize);
            int groupOffset   = Grid.LinearIndex * groupStride;
            int providerIndex = groupOffset + warpIndex;

            // Access the underlying provider
            Trace.Assert(
                providerIndex < randomProviders.Length,
                "Current warp does not have a valid RNG provider");
            return(ref randomProviders[providerIndex]);
        }
예제 #2
0
        public static T AllReduce <T, TReduction>(T value)
            where T : unmanaged
            where TReduction : IScanReduceOperation <T>
        {
            // A fixed number of memory banks to distribute the workload
            // of the atomic operations in shared memory.
            const int NumMemoryBanks = 4;
            var       sharedMemory   = SharedMemory.Allocate <T>(NumMemoryBanks);

            var warpIdx = Warp.ComputeWarpIdx(Group.IdxX);
            var laneIdx = Warp.LaneIdx;

            TReduction reduction = default;

            if (warpIdx == 0)
            {
                for (
                    int bankIdx = laneIdx;
                    bankIdx < NumMemoryBanks;
                    bankIdx += Warp.WarpSize)
                {
                    sharedMemory[bankIdx] = reduction.Identity;
                }
            }
            Group.Barrier();

            value = PTXWarpExtensions.Reduce <T, TReduction>(value);
            if (laneIdx == 0)
            {
                reduction.AtomicApply(ref sharedMemory[warpIdx % NumMemoryBanks], value);
            }
            Group.Barrier();

            // Note that this is explicitly unrolled (see NumMemoryBanks above)
            var result = sharedMemory[0];

            result = reduction.Apply(result, sharedMemory[1]);
            result = reduction.Apply(result, sharedMemory[2]);
            result = reduction.Apply(result, sharedMemory[3]);
            Group.Barrier();

            return(result);
        }
예제 #3
0
        /// <summary>
        /// Implements a basic block-wide reduction algorithm.
        /// The algorithm is based on the one from https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/.
        /// </summary>
        /// <typeparam name="T">The element type.</typeparam>
        /// <typeparam name="TShuffleDown">The type of the shuffle logic.</typeparam>
        /// <typeparam name="TReduction">The type of the reduction logic.</typeparam>
        /// <param name="groupThreadIdx">The current group-thread index.</param>
        /// <param name="value">The current value.</param>
        /// <param name="shuffleDown">The shuffle logic.</param>
        /// <param name="reduction">The reduction logic.</param>
        /// <param name="sharedMemory">A view to a section of group-shared memory.</param>
        /// <returns>The reduced value.</returns>
        public static T Reduce <T, TShuffleDown, TReduction>(
            Index groupThreadIdx,
            T value,
            TShuffleDown shuffleDown,
            TReduction reduction,
            ArrayView <T> sharedMemory)
            where T : struct
            where TShuffleDown : IShuffleDown <T>
            where TReduction : IReduction <T>
        {
            Debug.Assert(Warp.WarpSize > 1, "This algorithm can only be used on architectures with a warp size > 1");

            var warpIdx = Warp.ComputeWarpIdx(groupThreadIdx);
            var laneIdx = Warp.LaneIdx;

            value = Warp.Reduce(value, shuffleDown, reduction);

            if (laneIdx == 0)
            {
                Debug.Assert(warpIdx < sharedMemory.Length, "Shared memory out of range");
                sharedMemory[warpIdx] = value;
            }

            Group.Barrier();

            if (groupThreadIdx < Group.Dimension.X / Warp.WarpSize)
            {
                value = sharedMemory[laneIdx];
            }
            else
            {
                value = reduction.NeutralElement;
            }

            if (warpIdx == 0)
            {
                value = Warp.Reduce(value, shuffleDown, reduction);
            }

            return(value);
        }