public void Test_BLAS2_TRMV() { ClearBuffer(hiVectorXN); FillBuffer(hiVectorXN); diVectorXN = _gpu.Allocate(hiVectorXN); diMatrixA = _gpu.Allocate(hiMatrixANN); // Lower triangle matrix ClearBuffer(hiMatrixANN); CreateBandedMatrix(hiMatrixANN, N, N, N - 1, 0); _gpu.CopyToDevice(hiMatrixANN, diMatrixA); // Test without transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TRMV(N, diMatrixA, diVectorXN); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int i = 0; i < N; i++) { double cpuResult = 0.0; for (int j = 0; j < N; j++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[j]; } Assert.AreEqual(cpuResult, gpuResultN[i]); } // Test with transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TRMV(N, diMatrixA, diVectorXN, cublasOperation.T); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int i = 0; i < N; i++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[i]; } Assert.AreEqual(cpuResult, gpuResultN[j]); } // Upper triangle matrix ClearBuffer(hiMatrixANN); CreateBandedMatrix(hiMatrixANN, N, N, 0, N - 1); _gpu.CopyToDevice(hiMatrixANN, diMatrixA); // Test without transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TRMV(N, diMatrixA, diVectorXN, cublasOperation.N, cublasFillMode.Upper); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int i = 0; i < N; i++) { double cpuResult = 0.0; for (int j = 0; j < N; j++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[j]; } Assert.AreEqual(cpuResult, gpuResultN[i]); } // Test with transpose _gpu.CopyToDevice(hiVectorXN, diVectorXN); _blas.TRMV(N, diMatrixA, diVectorXN, cublasOperation.T, cublasFillMode.Upper); _gpu.CopyFromDevice(diVectorXN, gpuResultN); for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int i = 0; i < N; i++) { cpuResult += hiMatrixANN[GetIndexColumnMajor(i, j, N)] * hiVectorXN[i]; } Assert.AreEqual(cpuResult, gpuResultN[j]); } _gpu.FreeAll(); }