Exemplo n.º 1
0
        // ReSharper disable once SuggestBaseTypeForParameter
        private static void KernelSequentialReduceIdleThreadsWarp <T>(deviceptr <T> array, int length, T[] result, Func <T, T, T> op)
        {
            var shared = __shared__.ExternArray <T>();

            var tid = threadIdx.x;
            var bid = blockIdx.x;
            var bdm = blockDim.x;
            var gid = 2 * bdm * bid + tid;

            shared[tid] = (gid < length && gid + bdm < length)
                ? op(array[gid], array[gid + bdm])
                : array[gid];

            DeviceFunction.SyncThreads();

            for (var s = bdm / 2; s > WarpSize; s >>= 1)
            {
                if (tid < s && gid + s < length)
                {
                    shared[tid] = op(shared[tid], shared[tid + s]);
                }

                DeviceFunction.SyncThreads();
            }

            if (tid < WarpSize)
            {
                if (bdm >= 2 * WarpSize)
                {
                    shared[tid] = op(shared[tid], shared[tid + WarpSize]);
                }

                shared[tid] = op(shared[tid], DeviceFunction.ShuffleDown(shared[tid], 16));
                shared[tid] = op(shared[tid], DeviceFunction.ShuffleDown(shared[tid], 8));
                shared[tid] = op(shared[tid], DeviceFunction.ShuffleDown(shared[tid], 4));
                shared[tid] = op(shared[tid], DeviceFunction.ShuffleDown(shared[tid], 2));
                shared[tid] = op(shared[tid], DeviceFunction.ShuffleDown(shared[tid], 1));
            }

            if (tid == 0)
            {
                result[bid] = shared[0];
            }
        }
Exemplo n.º 2
0
        // ReSharper disable once SuggestBaseTypeForParameter
        private static void KernelSequentialReduceIdleThreadsWarpMultiple <T>(deviceptr <T> array, int length, deviceptr <T> result, Func <T, T, T> op)
        {
            var tid = threadIdx.x;
            var bid = blockIdx.x;
            var bdm = blockDim.x;
            var gid = bdm * bid + tid;

            // Todo: 'default(T)' is a bad idea, think of (n * 0) => The accumulator's initial value should be provided by the user!
            var accumulator = default(T);

            while (gid < length)
            {
                accumulator = op(accumulator, array[gid]);
                gid        += gridDim.x * bdm;
            }

            accumulator = op(accumulator, DeviceFunction.ShuffleDown(accumulator, 16));
            accumulator = op(accumulator, DeviceFunction.ShuffleDown(accumulator, 8));
            accumulator = op(accumulator, DeviceFunction.ShuffleDown(accumulator, 4));
            accumulator = op(accumulator, DeviceFunction.ShuffleDown(accumulator, 2));
            accumulator = op(accumulator, DeviceFunction.ShuffleDown(accumulator, 1));

            var shared = __shared__.Array <T>(8);

            if (tid % WarpSize == 0)
            {
                shared[tid / WarpSize] = accumulator;
            }

            DeviceFunction.SyncThreads();

            if (tid == 0)
            {
                var a = op(op(shared[0], shared[1]), op(shared[2], shared[3]));
                var b = op(op(shared[4], shared[5]), op(shared[6], shared[7]));
                result[bid] = op(a, b);
            }
        }