static string TestcublasSgemm2() { string testName = "TestcublasSgemm2"; uint aw = 2; uint bh = aw; uint ah = 3; uint bw = 3; uint ch = ah; uint cw = bw; GpuMatrix gpuA; GpuMatrix gpuB; GpuMatrix gpuC; var dataA = MatrixUtils.AA(); var dataB = MatrixUtils.BB(); var cRes = new float[ch * cw]; var cuby = new CublasClr.Cublas(); var aa = new CudaArray(); var res = aa.ResetDevice(); res = res + GpuMatrixOps.SetupGpuMatrix( out gpuA, new Matrix <float>(_rows: ch, _cols: cw, host_data: ImmutableArray.Create(dataA), matrixFormat: MatrixFormat.Column_Major)); res = res + GpuMatrixOps.SetupGpuMatrix( out gpuB, new Matrix <float>(_rows: bh, _cols: bw, host_data: ImmutableArray.Create(dataB), matrixFormat: MatrixFormat.Column_Major)); res = res + GpuMatrixOps.SetupGpuMatrix( out gpuC, new Matrix <float>(_rows: ch, _cols: cw, host_data: ImmutableArray.Create(cRes), matrixFormat: MatrixFormat.Column_Major)); IntPtr cublasHandle = new IntPtr(); res = res + cuby.MakeCublasHandle(ref cublasHandle); GpuMatrix gpuProd; res = res + GpuMatrixOps.Multiply( gmOut: out gpuProd, cublasHandle: cublasHandle, gmA: gpuA, gmB: gpuB, gmC: gpuC); GpuMatrix gpuSynched; res = res + GpuMatrixOps.CopyToHost(out gpuSynched, gpuProd); // GpuMatrixUtils.MatrixMult(C: cRes, A: dataA, B: dataB, wA: aw, hA: ah, wB: bw); return(string.Empty); }
static string TestCublasHandle() { CublasOp o = CublasClr.CublasOp.N; string testName = "TestCublasHandle"; var cuby = new CublasClr.Cublas(); IntPtr devHandle = new IntPtr(); var aa = new CudaArray(); try { var res = aa.ResetDevice(); res = res + cuby.MakeCublasHandle(ref devHandle); res = res + cuby.DestroyCublasHandle(devHandle); if (res != String.Empty) { return(testName + " fail: " + res); } return(testName + " pass"); } catch { return(testName + " fail"); } finally { //aa.ReleaseDevicePtr(devData); aa.ResetDevice(); } }
//C = α op ( A ) op ( B ) + β C public static string Multiply( out GpuMatrix gmOut, IntPtr cublasHandle, GpuMatrix gmA, GpuMatrix gmB, GpuMatrix gmC) { if (gmA.DevHostState == DevHostState.DeviceNotAllocated) { gmOut = null; return("Device data pointer for matrix A not allocated"); } if (gmB.DevHostState == DevHostState.DeviceNotAllocated) { gmOut = null; return("Device data pointer for matrix B not allocated"); } if (gmC.DevHostState == DevHostState.DeviceNotAllocated) { gmOut = null; return("Device data pointer for matrix C not allocated"); } var cuby = new CublasClr.Cublas(); var strRet = cuby.cublasSgemm( cublas_handle: cublasHandle, transa: CublasOp.N, transb: CublasOp.N, m: (int)gmA.Matrix.Rows, n: (int)gmB.Matrix.Cols, k: (int)gmA.Matrix.Cols, alpha: 1, dev_A: gmA.DevPtr, lda: (int)gmA.Matrix.Rows, dev_B: gmB.DevPtr, ldb: (int)gmB.Matrix.Rows, beta: 0, dev_C: gmC.DevPtr, ldc: (int)gmC.Matrix.Rows ); if (!String.IsNullOrEmpty(strRet)) { gmOut = null; return(strRet); } gmOut = new GpuMatrix( matrix: gmC.Matrix, devPtr: gmC.DevPtr, devHostState: DevHostState.DeviceIsNewer); return(String.Empty); }
static string TestcublasSgemm1() { string testName = "TestcublasSgemm"; uint aw = 5; uint bh = aw; uint ah = 5; uint bw = 5; uint ch = ah; uint cw = bw; var cuby = new CublasClr.Cublas(); var aa = new CudaArray(); var res = aa.ResetDevice(); var dataA = MatrixUtils.MakeIdentity(rows: ah, cols: aw); GpuMatrix gpuA; res = res + GpuMatrixOps.SetupGpuMatrix( out gpuA, new Matrix <float>(_rows: ah, _cols: aw, host_data: ImmutableArray.Create(dataA), matrixFormat: MatrixFormat.Column_Major)); var dataB = MatrixUtils.MakeIdentiPoke(rows: bh, cols: bw); GpuMatrix gpuB; res = res + GpuMatrixOps.SetupGpuMatrix( out gpuB, new Matrix <float>(_rows: bh, _cols: bw, host_data: ImmutableArray.Create(dataB), matrixFormat: MatrixFormat.Column_Major)); var dataC = MatrixUtils.MakeZeroes(rows: bh, cols: bw); GpuMatrix gpuC; res = res + GpuMatrixOps.SetupGpuMatrix( out gpuC, new Matrix <float>(_rows: ch, _cols: cw, host_data: ImmutableArray.Create(dataC), matrixFormat: MatrixFormat.Column_Major)); IntPtr cublasHandle = new IntPtr(); res = res + cuby.MakeCublasHandle(ref cublasHandle); GpuMatrix gpuProd; res = res + GpuMatrixOps.Multiply( gmOut: out gpuProd, cublasHandle: cublasHandle, gmA: gpuA, gmB: gpuB, gmC: gpuC); GpuMatrix gpuSynched; res = res + GpuMatrixOps.CopyToHost(out gpuSynched, gpuProd); var cpuRes = new float[ah * bw]; MatrixUtils.RowMajorMatrixMult(C: cpuRes, A: dataA, B: dataB, wA: aw, hA: ah, wB: bw); var cpuRes2 = new float[bh * aw]; MatrixUtils.RowMajorMatrixMult(C: cpuRes2, A: dataB, B: dataA, wA: bw, hA: bh, wB: aw); return(res); }