public void Test_BLAS3_SYRK() { // Lower fill mode, No transpose ClearBuffer(hiMatrixAMK); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAMK); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAMK); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYRK(M, K, Alpha, diMatrixA, Beta, diMatrixC); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = 0; j <= i; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixAMK[GetIndexColumnMajor(j, k, M)]; } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Lower fill mode, Transpose ClearBuffer(hiMatrixAKM); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAKM); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAKM); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYRK(M, K, Alpha, diMatrixA, Beta, diMatrixC, cublasOperation.T); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = 0; j <= i; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixAKM[GetIndexColumnMajor(k, j, K)]; } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, No transpose ClearBuffer(hiMatrixAMK); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAMK); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAMK); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYRK(M, K, Alpha, diMatrixA, Beta, diMatrixC, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = i; j < M; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixAMK[GetIndexColumnMajor(j, k, M)]; } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, Transpose ClearBuffer(hiMatrixAKM); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAKM); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAKM); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYRK(M, K, Alpha, diMatrixA, Beta, diMatrixC, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = i; j < M; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixAKM[GetIndexColumnMajor(k, j, K)]; } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); }