public void Test_BLAS3_TRMM() { // Lower fill mode, Side left, No transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixAMM, M, M, M - 1, 0); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Lower fill mode, Side left, Transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixAMM, M, M, M - 1, 0); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Left, cublasOperation.T); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(k, i, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Lower fill mode, Side right, No transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixANN, N, N, N - 1, 0); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Lower fill mode, Side right, Transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixANN, N, N, N - 1, 0); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right, cublasOperation.T); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(j, k, N)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, Side left, No transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixAMM, M, M, 0, M - 1); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Left, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, Side left, Transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixAMM, M, M, 0, M - 1); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Left, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(k, i, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, Side right, No transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixANN, N, N, 0, N - 1); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, Side right, Transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateBandedMatrix(hiMatrixANN, N, N, 0, N - 1); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.Allocate(hiMatrixCMN); _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = i; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(j, k, N)]; } Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); }