public void Test_BLAS2_GBMV() { ClearBuffer(hiMatrixA); ClearBuffer(hiMatrixACBC); ClearBuffer(hiVectorXM); ClearBuffer(hiVectorXN); ClearBuffer(hiVectorYM); ClearBuffer(hiVectorYN); CreateBandedMatrix(hiMatrixA, M, N, KL, KU); CompressBandedMatrixToCBC(hiMatrixA, hiMatrixACBC, M, N, KL, KU); FillBuffer(hiVectorXM); FillBuffer(hiVectorXN); FillBuffer(hiVectorYM); FillBuffer(hiVectorYN); diMatrixA = _gpu.CopyToDevice(hiMatrixACBC); diVectorXM = _gpu.CopyToDevice(hiVectorXM); diVectorXN = _gpu.CopyToDevice(hiVectorXN); // Test without transpose diVectorYM = _gpu.CopyToDevice(hiVectorYM); _blas.GBMV(M, N, KL, KU, Alpha, diMatrixA, diVectorXN, Beta, diVectorYM); _gpu.CopyFromDevice(diVectorYM, gpuResultM); for (int i = 0; i < M; i++) { double cpuResult = 0.0; for (int j = 0; j < N; j++) { cpuResult += Alpha * hiMatrixA[GetIndexColumnMajor(i, j, M)] * hiVectorXN[j]; } cpuResult += Beta * hiVectorYM[i]; Assert.AreEqual(cpuResult, gpuResultM[i]); } // Test with transpose diVectorYN = _gpu.CopyToDevice(hiVectorYN); _blas.GBMV(M, N, KL, KU, Alpha, diMatrixA, diVectorXM, Beta, diVectorYN, cublasOperation.T); _gpu.CopyFromDevice(diVectorYN, gpuResultN); for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int i = 0; i < M; i++) { cpuResult += Alpha * hiMatrixA[GetIndexColumnMajor(i, j, M)] * hiVectorXM[i]; } cpuResult += Beta * hiVectorYN[j]; Assert.AreEqual(cpuResult, gpuResultN[j]); } _gpu.FreeAll(); }