static string TestcublasSgemm2() { string testName = "TestcublasSgemm2"; uint aw = 2; uint bh = aw; uint ah = 3; uint bw = 3; uint ch = ah; uint cw = bw; GpuMatrix gpuA; GpuMatrix gpuB; GpuMatrix gpuC; var dataA = MatrixUtils.AA(); var dataB = MatrixUtils.BB(); var cRes = new float[ch * cw]; var cuby = new CublasClr.Cublas(); var aa = new CudaArray(); var res = aa.ResetDevice(); res = res + GpuMatrixOps.SetupGpuMatrix( out gpuA, new Matrix <float>(_rows: ch, _cols: cw, host_data: ImmutableArray.Create(dataA), matrixFormat: MatrixFormat.Column_Major)); res = res + GpuMatrixOps.SetupGpuMatrix( out gpuB, new Matrix <float>(_rows: bh, _cols: bw, host_data: ImmutableArray.Create(dataB), matrixFormat: MatrixFormat.Column_Major)); res = res + GpuMatrixOps.SetupGpuMatrix( out gpuC, new Matrix <float>(_rows: ch, _cols: cw, host_data: ImmutableArray.Create(cRes), matrixFormat: MatrixFormat.Column_Major)); IntPtr cublasHandle = new IntPtr(); res = res + cuby.MakeCublasHandle(ref cublasHandle); GpuMatrix gpuProd; res = res + GpuMatrixOps.Multiply( gmOut: out gpuProd, cublasHandle: cublasHandle, gmA: gpuA, gmB: gpuB, gmC: gpuC); GpuMatrix gpuSynched; res = res + GpuMatrixOps.CopyToHost(out gpuSynched, gpuProd); // GpuMatrixUtils.MatrixMult(C: cRes, A: dataA, B: dataB, wA: aw, hA: ah, wB: bw); return(string.Empty); }