public void Test_BLAS3_TRSM() { // Solve AX = B double maxError; // Lower triangular, Side left, No transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixAMM, M); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); // Check AX - B maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * gpuResultMN[GetIndexColumnMajor(k, j, M)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Lower fill mode, Side left, No transpose)", maxError); _gpu.FreeAll(); // Lower triangular, Side left, Transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixAMM, M); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB, cublasSideMode.Left, cublasOperation.T, cublasFillMode.Lower); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += hiMatrixAMM[GetIndexColumnMajor(k, i, M)] * gpuResultMN[GetIndexColumnMajor(k, j, M)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Lower fill mode, Side left, Transpose)", maxError); _gpu.FreeAll(); // Lower triangular, Side right, No transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixANN, N); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB, cublasSideMode.Right); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); // Check AX - B maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += gpuResultMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Lower fill mode, Side right, No transpose)", maxError); _gpu.FreeAll(); // Lower triangular, Side right, Transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixANN, N); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB, cublasSideMode.Right, cublasOperation.T); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); // Check AX - B maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += gpuResultMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(j, k, N)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Lower fill mode, Side right, Transpose)", maxError); _gpu.FreeAll(); // Upper triangular, Side left, No transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixAMM, M); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB, cublasSideMode.Left, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); // Check AX - B maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * gpuResultMN[GetIndexColumnMajor(k, j, M)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Upper fill mode, Side left, No transpose)", maxError); _gpu.FreeAll(); // Upper triangular, Side left, Transpose ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixAMM, M); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB, cublasSideMode.Left, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += hiMatrixAMM[GetIndexColumnMajor(k, i, M)] * gpuResultMN[GetIndexColumnMajor(k, j, M)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Upper fill mode, Side left, Transpose)", maxError); _gpu.FreeAll(); // Upper triangular, Side right, No transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixANN, N); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB, cublasSideMode.Right, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); // Check AX - B maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += gpuResultMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Upper fill mode, Side right, No transpose)", maxError); _gpu.FreeAll(); // Upper triangular, Side right, Transpose ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); CreateMainDiagonalOnlyMatrix(hiMatrixANN, N); FillBuffer(hiMatrixBMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); _blas.TRSM(M, N, Alpha, diMatrixA, diMatrixB, cublasSideMode.Right, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixB, gpuResultMN); // Check AX - B maxError = 0.0; for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += gpuResultMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(j, k, N)]; } double error = Math.Abs(cpuResult - Alpha * hiMatrixBMN[GetIndexColumnMajor(i, j, M)]); if (maxError < error) { maxError = error; } } } Console.WriteLine("Max error : {0} (Upper fill mode, Side right, Transpose)", maxError); _gpu.FreeAll(); }