public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm) { GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); int cols = dm.ColumnCount, rows = dm.RowCount; int restRows = rows - cols; //double[] a = dm.Storage.ToColumnMajorArray(); double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray(); double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray(); dm = null; double[] a_d = gpu.CopyToDevice <double>(a); a = null; double[] c_d = gpu.Allocate <double>(cols * cols); double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); a = new double[cols * rows]; gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols); gpu.FreeAll(); a_d = gpu.CopyToDevice <double>(b); b = null; c_d = gpu.Allocate <double>(restRows * cols); x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols); gpu.FreeAll(); dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a); }
public void Test_BLAS2_GEMV() { ClearBuffer(hiMatrixA); ClearBuffer(hiVectorXM); ClearBuffer(hiVectorXN); ClearBuffer(hiVectorYM); ClearBuffer(hiVectorYN); FillBuffer(hiMatrixA); FillBuffer(hiVectorXM); FillBuffer(hiVectorXN); FillBuffer(hiVectorYM); FillBuffer(hiVectorYN); diMatrixA = _gpu.CopyToDevice(hiMatrixA); diVectorXM = _gpu.CopyToDevice(hiVectorXM); diVectorXN = _gpu.CopyToDevice(hiVectorXN); // Test without transpose diVectorYM = _gpu.CopyToDevice(hiVectorYM); _blas.GEMV(M, N, Alpha, diMatrixA, diVectorXN, Beta, diVectorYM); _gpu.CopyFromDevice(diVectorYM, gpuResultM); for (int i = 0; i < M; i++) { double cpuResult = 0.0; for (int j = 0; j < N; j++) { cpuResult += Alpha * hiMatrixA[GetIndexColumnMajor(i, j, M)] * hiVectorXN[j]; } cpuResult += Beta * hiVectorYM[i]; Assert.AreEqual(cpuResult, gpuResultM[i]); } // Test with transpose diVectorYN = _gpu.CopyToDevice(hiVectorYN); _blas.GEMV(M, N, Alpha, diMatrixA, diVectorXM, Beta, diVectorYN, cublasOperation.T); _gpu.CopyFromDevice(diVectorYN, gpuResultN); for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int i = 0; i < M; i++) { cpuResult += Alpha * hiMatrixA[GetIndexColumnMajor(i, j, M)] * hiVectorXM[i]; } cpuResult += Beta * hiVectorYN[j]; Assert.AreEqual(cpuResult, gpuResultN[j]); } _gpu.FreeAll(); }