예제 #1
0
        private static void IlGpuOptimisedImpl <TInt>(
            CudaAccelerator gpu,
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n,
            string name,
            Action <ArrayView2D <Real>, ArrayView <Real>, TInt, int> kernelFunc,
            Func <int, TInt> numCoordGetter)
            where TInt : struct
        {
            using var cudaSquaredDistance = gpu.Allocate <Real>(n, n);
            using var cudaCoordinates     = gpu.Allocate(mCoordinates);
            var timer = Stopwatch.StartNew();

            const int blockSize = 128;
            var       gridSize  = Util.DivUp(n, blockSize);
            var       lp        = ((gridSize, gridSize, 1), (blockSize, 1, 1), SharedMemoryConfig.RequestDynamic <Real>(2 * c * blockSize));

            gpu.Launch(kernelFunc, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, numCoordGetter(c), n);
            gpu.Synchronize();

            Util.PrintPerformance(timer, name, n, c, n);

            cudaSquaredDistance.CopyTo(mSquaredDistances, (0, 0), 0, (n, n));
        }
예제 #2
0
        public static void IlGpu(
            CudaAccelerator gpu,
            Real[] mIntraReturn,
            Real[] vClose,
            Real[] vIsAlive,
            Real[] vIsValidDay,
            int m,
            int n)
        {
            using (var cudaIntraReturn = gpu.Allocate(mIntraReturn))
                using (var cudaClose = gpu.Allocate(vClose))
                    using (var cudaIsAlive = gpu.Allocate(vIsAlive))
                        using (var cudaIsValidDay = gpu.Allocate(vIsValidDay))
                        {
                            var timer = Stopwatch.StartNew();

                            var gridSizeX = Util.DivUp(n, 32);
                            var gridSizeY = Util.DivUp(m, 8);
                            var lp        = ((gridSizeX, gridSizeY, 1), (32, 8));

                            gpu.Launch(IlGpuKernel, gpu.DefaultStream, lp, cudaIntraReturn.View, cudaClose.View, cudaIsAlive.View, cudaIsValidDay.View, m, n);

                            gpu.Synchronize();
                            Util.PrintPerformance(timer, "IntraReturn.IlGpu", 5, m, n);

                            cudaIntraReturn.CopyTo(mIntraReturn, 0, 0, mIntraReturn.Length);
                        }
        }
예제 #3
0
        public static void IlGpu(CudaAccelerator gpu, Real[] matrix, Real[] vector, int m, int n)
        {
            using (var cudaMatrix = gpu.Allocate(matrix))
                using (var cudaVector = gpu.Allocate(vector))
                {
                    var timer = Stopwatch.StartNew();

                    var gridSizeX = Util.DivUp(n, 32);
                    var gridSizeY = Util.DivUp(m, 8);
                    var lp        = ((gridSizeX, gridSizeY, 1), (32, 8));

                    gpu.Launch(IlGpuKernel, gpu.DefaultStream, lp, cudaMatrix.View, cudaVector.View, m, n);

                    gpu.Synchronize();
                    Util.PrintPerformance(timer, "AddVector.IlGpu", 3, m, n);

                    cudaMatrix.CopyTo(matrix, 0, 0, matrix.Length);
                }
        }
예제 #4
0
        public static void IlGpu(
            CudaAccelerator gpu,
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n)
        {
            using var cudaSquaredDistance = gpu.Allocate(mSquaredDistances);
            using var cudaCoordinates     = gpu.Allocate(mCoordinates);
            var timer = Stopwatch.StartNew();

            const int blockSize = 128;

            var gridSize = Util.DivUp(n * n, blockSize);
            var lp       = (gridSize, blockSize);

            gpu.Launch(IlGpuKernel, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, c, n);
            gpu.Synchronize();

            Util.PrintPerformance(timer, "SquaredDistance.IlGpu", n, c, n);

            cudaSquaredDistance.CopyTo(mSquaredDistances, 0, 0, mSquaredDistances.Length);
        }
예제 #5
0
        private static void IlGpuOptimisedImpl(
            CudaAccelerator gpu,
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n,
            string name,
            Action <ArrayView2D <Real>, ArrayView <Real>, SpecializedValue <int>, SpecializedValue <int>, int> kernelFunc)
        {
            using var cudaSquaredDistance = gpu.Allocate <Real>(n, n);
            using var cudaCoordinates     = gpu.Allocate(mCoordinates);
            var timer = Stopwatch.StartNew();

            const int blockSize = 128;
            var       gridSize  = Util.DivUp(n, blockSize);
            var       lp        = ((gridSize, gridSize, 1), (blockSize, 1, 1));

            gpu.Launch(kernelFunc, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, SpecializedValue.New(blockSize), SpecializedValue.New(c), n);
            gpu.Synchronize();

            Util.PrintPerformance(timer, name, n, c, n);

            cudaSquaredDistance.CopyTo(mSquaredDistances, (0, 0), 0, (n, n));
        }