Exemplo n.º 1
0
        private static void AleaOptimisedImpl(
            Gpu gpu,
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n,
            string name,
            Action <deviceptr <Real>, deviceptr <Real>, Constant <int>, Constant <int>, int, int> kernel)
        {
            using var cudaSquaredDistance = gpu.AllocateDevice <Real>(n, n);
            using var cudaCoordinates     = gpu.AllocateDevice(mCoordinates);
            var timer = Stopwatch.StartNew();

            const int blockSize = 256;
            var       gridSize  = Util.DivUp(n, blockSize);
            var       lp        = new LaunchParam(new dim3(gridSize, gridSize, 1), new dim3(blockSize, 1, 1));
            var       pitch     = cudaSquaredDistance.PitchInElements.ToInt32();

            gpu.Launch(kernel, lp, cudaSquaredDistance.Ptr, cudaCoordinates.Ptr, Gpu.Constant(blockSize), Gpu.Constant(c), n, pitch);
            gpu.Synchronize();

            Util.PrintPerformance(timer, name, n, c, n);

            Gpu.Copy2D(cudaSquaredDistance, mSquaredDistances, n, n);
        }
Exemplo n.º 2
0
        private static void CudaOptimisedImpl <TInt>(
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n,
            string name,
            Action <deviceptr <float>, deviceptr <float>, TInt, int, int> kernel,
            Func <int, TInt> numCoordGetter)
        {
            var gpu = Gpu.Default;

            using (var cudaSquaredDistance = gpu.AllocateDevice <Real>(n, n))
                using (var cudaCoordinates = gpu.AllocateDevice(mCoordinates))
                {
                    var timer = Stopwatch.StartNew();

                    const int blockSize = 128;
                    var       gridSize  = Util.DivUp(n, blockSize);
                    var       lp        = new LaunchParam(new dim3(gridSize, gridSize, 1), new dim3(blockSize, 1, 1), 2 * c * blockSize * sizeof(Real));
                    var       pitch     = cudaSquaredDistance.PitchInElements.ToInt32();

                    gpu.Launch(kernel, lp, cudaSquaredDistance.Ptr, cudaCoordinates.Ptr, numCoordGetter(c), n, pitch);

                    gpu.Synchronize();
                    Util.PrintPerformance(timer, name, n, c, n);

                    Gpu.Copy2D(cudaSquaredDistance, mSquaredDistances, n, n);
                }
        }