public void Test_BLAS3_SYR2K() { // Lower fill mode, No transpose ClearBuffer(hiMatrixAMK); ClearBuffer(hiMatrixBMK); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAMK); FillBuffer(hiMatrixBMK); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAMK); diMatrixB = _gpu.CopyToDevice(hiMatrixBMK); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = 0; j <= i; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * (hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixBMK[GetIndexColumnMajor(j, k, M)] + hiMatrixBMK[GetIndexColumnMajor(i, k, M)] * hiMatrixAMK[GetIndexColumnMajor(j, k, M)]); } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Lower fill mode, With Transpose ClearBuffer(hiMatrixAKM); ClearBuffer(hiMatrixBKM); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAKM); FillBuffer(hiMatrixBKM); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAKM); diMatrixB = _gpu.CopyToDevice(hiMatrixBKM); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.T); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = 0; j <= i; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * (hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixBKM[GetIndexColumnMajor(k, j, K)] + hiMatrixBKM[GetIndexColumnMajor(k, i, K)] * hiMatrixAKM[GetIndexColumnMajor(k, j, K)]); } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Upper fill mode, No transpose ClearBuffer(hiMatrixAMK); ClearBuffer(hiMatrixBMK); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAMK); FillBuffer(hiMatrixBMK); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAMK); diMatrixB = _gpu.CopyToDevice(hiMatrixBMK); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = i; j < M; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * (hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixBMK[GetIndexColumnMajor(j, k, M)] + hiMatrixBMK[GetIndexColumnMajor(i, k, M)] * hiMatrixAMK[GetIndexColumnMajor(j, k, M)]); } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); // Lower fill mode, With Transpose ClearBuffer(hiMatrixAKM); ClearBuffer(hiMatrixBKM); ClearBuffer(hiMatrixCMM); FillBuffer(hiMatrixAKM); FillBuffer(hiMatrixBKM); FillBuffer(hiMatrixCMM); ConverToSymmetric(hiMatrixCMM, M); diMatrixA = _gpu.CopyToDevice(hiMatrixAKM); diMatrixB = _gpu.CopyToDevice(hiMatrixBKM); diMatrixC = _gpu.CopyToDevice(hiMatrixCMM); _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diMatrixC, gpuResultMM); for (int i = 0; i < M; i++) { for (int j = i; j < M; j++) { double cpuResult = 0.0; for (int k = 0; k < K; k++) { cpuResult += Alpha * (hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixBKM[GetIndexColumnMajor(k, j, K)] + hiMatrixBKM[GetIndexColumnMajor(k, i, K)] * hiMatrixAKM[GetIndexColumnMajor(k, j, K)]); } cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)]; Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]); } } _gpu.FreeAll(); }