public void Test_BLAS2_TBMV() { ClearBuffer(hiMatrixASCBC); ClearBuffer(hiVectorXN); FillBuffer(hiVectorXN); diMatrixA = _gpu.Allocate(hiMatrixASCBC); diVectorXN = _gpu.Allocate(hiVectorXN); // Lower triangular banded matrix ClearBuffer(hiMatrixANN); CreateBandedMatrix(hiMatrixANN, N, N, K, 0); CompressSymmetricBandedMatrixToCBC(hiMatrixANN, hiMatrixASCBC, N, K, cublasFillMode.Lower); _gpu.CopyToDevice(hiMatrixASCBC, diMatrixA); // Test without transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TBMV(N, K, diMatrixA, diVectorXN); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int i = 0; i < N; i++) { double cpuResult = 0.0; for (int j = 0; j < N; j++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[j]; } Assert.AreEqual(cpuResult, gpuResultN[i]); } // Test with transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TBMV(N, K, diMatrixA, diVectorXN, cublasOperation.T); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int i = 0; i < N; i++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[i]; } Assert.AreEqual(cpuResult, gpuResultN[j]); } // Upper triangular banded matrix ClearBuffer(hiMatrixANN); CreateBandedMatrix(hiMatrixANN, N, N, 0, K); CompressSymmetricBandedMatrixToCBC(hiMatrixANN, hiMatrixASCBC, N, K, cublasFillMode.Upper); _gpu.CopyToDevice(hiMatrixASCBC, diMatrixA); // Test without transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TBMV(N, K, diMatrixA, diVectorXN, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int i = 0; i < N; i++) { double cpuResult = 0.0; for (int j = 0; j < N; j++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[j]; } Assert.AreEqual(cpuResult, gpuResultN[i]); } // Test with transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TBMV(N, K, diMatrixA, diVectorXN, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int i = 0; i < N; i++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[i]; } Assert.AreEqual(cpuResult, gpuResultN[j]); } _gpu.FreeAll(); }