public void Test_BLAS3_SYMM() { // Lower fill mode, Side left ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); ClearBuffer(hiMatrixCMN); FillBuffer(hiMatrixAMM); ConverToSymmetric(hiMatrixAMM, M); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.CopyToDevice(hiMatrixCMN); _blas.SYMM(M, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)]; } cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Lower fill mode, Side right ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); ClearBuffer(hiMatrixCMN); FillBuffer(hiMatrixANN); ConverToSymmetric(hiMatrixANN, N); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.CopyToDevice(hiMatrixCMN); _blas.SYMM(M, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasSideMode.Right); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)]; } cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, Side left ClearBuffer(hiMatrixAMM); ClearBuffer(hiMatrixBMN); ClearBuffer(hiMatrixCMN); FillBuffer(hiMatrixAMM); ConverToSymmetric(hiMatrixAMM, M); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixAMM); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.CopyToDevice(hiMatrixCMN); _blas.SYMM(M, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasSideMode.Left, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < M; k++) { cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)]; } cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, Side right ClearBuffer(hiMatrixANN); ClearBuffer(hiMatrixBMN); ClearBuffer(hiMatrixCMN); FillBuffer(hiMatrixANN); ConverToSymmetric(hiMatrixANN, N); FillBuffer(hiMatrixBMN); FillBuffer(hiMatrixCMN); diMatrixA = _gpu.CopyToDevice(hiMatrixANN); diMatrixB = _gpu.CopyToDevice(hiMatrixBMN); diMatrixC = _gpu.CopyToDevice(hiMatrixCMN); _blas.SYMM(M, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasSideMode.Right, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMN); for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int k = 0; k < N; k++) { cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)]; } cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); }