public static void IlGpu(CudaAccelerator gpu, Real[] result, Real[] left, Real[] right, int n) { using (var cudaResult = gpu.Allocate(result)) using (var cudaLeft = gpu.Allocate(left)) using (var cudaRight = gpu.Allocate(right)) { using var blas = new CuBlas(gpu, CuBlasAPIVersion.V11); var timer = Stopwatch.StartNew(); blas.Gemm( CuBlasOperation.NonTranspose, CuBlasOperation.NonTranspose, n, n, n, 1, cudaLeft.View, n, cudaRight.View, n, 0, cudaResult.View, n); gpu.Synchronize(); PrintPerformance(timer, "MatrixMultiplication.IlGpu.cuBLAS", n, n, n); cudaResult.CopyTo(result, 0, 0, result.Length); } }
public static unsafe cublasContext *blas_handle() { if (!cublasInit) { cublasContext *handle; SafeCall(CuBlas.cublasCreate_v2(&handle)); cublasInit = true; cublasHandle = handle; } return(cublasHandle); }
static void Main() { const int DataSize = 1024; const CuBlasAPIVersion CuBlasVersion = CuBlasAPIVersion.V10; using (var context = new Context()) { // Enable algorithms library context.EnableAlgorithms(); // Check for Cuda support foreach (var acceleratorId in CudaAccelerator.CudaAccelerators) { using (var accelerator = new CudaAccelerator(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); var buf = accelerator.Allocate <float>(DataSize); var buf2 = accelerator.Allocate <float>(DataSize); accelerator.Initialize(accelerator.DefaultStream, buf, 1.0f); accelerator.Initialize(accelerator.DefaultStream, buf2.View, 1.0f); // Initialize the CuBlas library using manual pointer mode handling // (default behavior) using (var blas = new CuBlas(accelerator, CuBlasVersion)) { // Set pointer mode to Host to enable data transfer to CPU memory blas.PointerMode = CuBlasPointerMode.Host; float output = blas.Nrm2(buf); // Set pointer mode to Device to enable data transfer to GPU memory blas.PointerMode = CuBlasPointerMode.Device; blas.Nrm2(buf, buf2); // Use pointer mode scopes to recover the previous pointer mode using (var scope = blas.BeginPointerScope(CuBlasPointerMode.Host)) { float output2 = blas.Nrm2(buf); } } // Initialize the CuBlas<T> library using custom pointer mode handlers using (var blas = new CuBlas <CuBlasPointerModeHandlers.AutomaticMode>(accelerator, CuBlasVersion)) { // Automatic transfer to host float output = blas.Nrm2(buf); // Automatic transfer to device blas.Nrm2(buf, buf2); } } } } }
static void Main() { const int DataSize = 1024; using var context = Context.Create(builder => builder.Cuda().EnableAlgorithms()); // Check for Cuda support foreach (var device in context.GetCudaDevices()) { using var accelerator = device.CreateCudaAccelerator(context); Console.WriteLine($"Performing operations on {accelerator}"); var buf = accelerator.Allocate1D <float>(DataSize); var buf2 = accelerator.Allocate1D <float>(DataSize); accelerator.Initialize(accelerator.DefaultStream, buf.View, 1.0f); accelerator.Initialize(accelerator.DefaultStream, buf2.View, 1.0f); // Initialize the CuBlas library using manual pointer mode handling // (default behavior) using (var blas = new CuBlas(accelerator)) { // Set pointer mode to Host to enable data transfer to CPU memory blas.PointerMode = CuBlasPointerMode.Host; float output = blas.Nrm2(buf.View.AsGeneral()); // Set pointer mode to Device to enable data transfer to GPU memory blas.PointerMode = CuBlasPointerMode.Device; blas.Nrm2(buf.View.AsGeneral(), buf2.View); // Use pointer mode scopes to recover the previous pointer mode using var scope = blas.BeginPointerScope(CuBlasPointerMode.Host); float output2 = blas.Nrm2(buf.View.AsGeneral()); } // Initialize the CuBlas<T> library using custom pointer mode handlers using (var blas = new CuBlas <CuBlasPointerModeHandlers.AutomaticMode>(accelerator)) { // Automatic transfer to host float output = blas.Nrm2(buf.View.AsGeneral()); // Automatic transfer to device blas.Nrm2(buf.View.AsGeneral(), buf2.View); } } }
public static unsafe void gemm_ongpu(int ta, int tb, int m, int n, int k, float alpha, float[] a, int lda, float[] b, int ldb, float beta, float[] c, int ldc) { using (var gpuA = Gpu.Default.AllocateDevice(a.ToArray())) using (var gpuB = Gpu.Default.AllocateDevice(b.ToArray())) using (var gpuC = Gpu.Default.AllocateDevice(c.ToArray())) { var handle = CudaUtils.blas_handle(); CudaUtils.SafeCall(CuBlas.cublasSgemm_v2(handle, (tb != 0 ? cublasOperation_t.CUBLAS_OP_T : cublasOperation_t.CUBLAS_OP_N), (ta != 0 ? cublasOperation_t.CUBLAS_OP_T : cublasOperation_t.CUBLAS_OP_N), n, m, k, &alpha, (float *)gpuB.Handle, ldb, (float *)gpuA.Handle, lda, &beta, (float *)gpuC.Handle, ldc)); a = Gpu.CopyToHost(gpuA); b = Gpu.CopyToHost(gpuB); c = Gpu.CopyToHost(gpuC); } }