private static void IlGpuOptimisedImpl <TInt>( CudaAccelerator gpu, Real[] mSquaredDistances, Real[] mCoordinates, int c, int n, string name, Action <ArrayView2D <Real>, ArrayView <Real>, TInt, int> kernelFunc, Func <int, TInt> numCoordGetter) where TInt : struct { using var cudaSquaredDistance = gpu.Allocate <Real>(n, n); using var cudaCoordinates = gpu.Allocate(mCoordinates); var timer = Stopwatch.StartNew(); const int blockSize = 128; var gridSize = Util.DivUp(n, blockSize); var lp = ((gridSize, gridSize, 1), (blockSize, 1, 1), SharedMemoryConfig.RequestDynamic <Real>(2 * c * blockSize)); gpu.Launch(kernelFunc, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, numCoordGetter(c), n); gpu.Synchronize(); Util.PrintPerformance(timer, name, n, c, n); cudaSquaredDistance.CopyTo(mSquaredDistances, (0, 0), 0, (n, n)); }
public static void IlGpu( CudaAccelerator gpu, Real[] mIntraReturn, Real[] vClose, Real[] vIsAlive, Real[] vIsValidDay, int m, int n) { using (var cudaIntraReturn = gpu.Allocate(mIntraReturn)) using (var cudaClose = gpu.Allocate(vClose)) using (var cudaIsAlive = gpu.Allocate(vIsAlive)) using (var cudaIsValidDay = gpu.Allocate(vIsValidDay)) { var timer = Stopwatch.StartNew(); var gridSizeX = Util.DivUp(n, 32); var gridSizeY = Util.DivUp(m, 8); var lp = ((gridSizeX, gridSizeY, 1), (32, 8)); gpu.Launch(IlGpuKernel, gpu.DefaultStream, lp, cudaIntraReturn.View, cudaClose.View, cudaIsAlive.View, cudaIsValidDay.View, m, n); gpu.Synchronize(); Util.PrintPerformance(timer, "IntraReturn.IlGpu", 5, m, n); cudaIntraReturn.CopyTo(mIntraReturn, 0, 0, mIntraReturn.Length); } }
public static void IlGpu(CudaAccelerator gpu, Real[] matrix, Real[] vector, int m, int n) { using (var cudaMatrix = gpu.Allocate(matrix)) using (var cudaVector = gpu.Allocate(vector)) { var timer = Stopwatch.StartNew(); var gridSizeX = Util.DivUp(n, 32); var gridSizeY = Util.DivUp(m, 8); var lp = ((gridSizeX, gridSizeY, 1), (32, 8)); gpu.Launch(IlGpuKernel, gpu.DefaultStream, lp, cudaMatrix.View, cudaVector.View, m, n); gpu.Synchronize(); Util.PrintPerformance(timer, "AddVector.IlGpu", 3, m, n); cudaMatrix.CopyTo(matrix, 0, 0, matrix.Length); } }
public static void IlGpu( CudaAccelerator gpu, Real[] mSquaredDistances, Real[] mCoordinates, int c, int n) { using var cudaSquaredDistance = gpu.Allocate(mSquaredDistances); using var cudaCoordinates = gpu.Allocate(mCoordinates); var timer = Stopwatch.StartNew(); const int blockSize = 128; var gridSize = Util.DivUp(n * n, blockSize); var lp = (gridSize, blockSize); gpu.Launch(IlGpuKernel, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, c, n); gpu.Synchronize(); Util.PrintPerformance(timer, "SquaredDistance.IlGpu", n, c, n); cudaSquaredDistance.CopyTo(mSquaredDistances, 0, 0, mSquaredDistances.Length); }
private static void IlGpuOptimisedImpl( CudaAccelerator gpu, Real[] mSquaredDistances, Real[] mCoordinates, int c, int n, string name, Action <ArrayView2D <Real>, ArrayView <Real>, SpecializedValue <int>, SpecializedValue <int>, int> kernelFunc) { using var cudaSquaredDistance = gpu.Allocate <Real>(n, n); using var cudaCoordinates = gpu.Allocate(mCoordinates); var timer = Stopwatch.StartNew(); const int blockSize = 128; var gridSize = Util.DivUp(n, blockSize); var lp = ((gridSize, gridSize, 1), (blockSize, 1, 1)); gpu.Launch(kernelFunc, gpu.DefaultStream, lp, cudaSquaredDistance.View, cudaCoordinates.View, SpecializedValue.New(blockSize), SpecializedValue.New(c), n); gpu.Synchronize(); Util.PrintPerformance(timer, name, n, c, n); cudaSquaredDistance.CopyTo(mSquaredDistances, (0, 0), 0, (n, n)); }