//[Test] public void TestCGSolver() { Stopwatch sw = new Stopwatch(); float one = 1.0f; float zero = 0.0f; _hiMatrixMN = new float[N * N]; _hoVectorN = new float[N]; CreateDiagonalMatrix(_hiMatrixMN, N, 6); _hiVectorN = new float[N]; _hiVectorN2 = new float[N]; FillBuffer(_hiVectorN2, 6); _diMatrixMN = _gpu.CopyToDevice(_hiMatrixMN); _diVectorN = _gpu.Allocate(_hiVectorN); _diVectorN2 = _gpu.CopyToDevice(_hiVectorN2); _diPerRow = _gpu.Allocate <int>(N); _diVectorP = _gpu.Allocate <float>(N); _diVectorAX = _gpu.Allocate <float>(N); int nnz = _sparse.NNZ(N, N, _diMatrixMN, _diPerRow); _diCSRVals = _gpu.Allocate <float>(nnz); _diCSRCols = _gpu.Allocate <int>(nnz); _diCSRRows = _gpu.Allocate <int>(N + 1); _sparse.Dense2CSR(N, N, _diMatrixMN, _diPerRow, _diCSRVals, _diCSRRows, _diCSRCols); sw.Start(); SolveResult result = _solver.CG(N, nnz, _diCSRVals, _diCSRRows, _diCSRCols, _diVectorN, _diVectorN2, _diVectorP, _diVectorAX, 0.01f, 1000); long time = sw.ElapsedMilliseconds; _sparse.CSRMV(N, N, nnz, ref one, _diCSRVals, _diCSRRows, _diCSRCols, _diVectorN, ref zero, _diVectorN2); _gpu.CopyFromDevice(_diVectorN2, _hoVectorN); float maxError = 0.0f; for (int i = 0; i < N; i++) { float error = Math.Abs(_hoVectorN[i] - _hiVectorN2[i]); if (error > maxError) { maxError = error; } } Console.WriteLine("Time : {0} ms", time); Console.WriteLine("Iterate Count : {0}", result.IterateCount); Console.WriteLine("Residual : {0}", result.LastError); Console.WriteLine("max error : {0}", maxError); _gpu.FreeAll(); }
/// <summary> /// Solves symmetric linear system with conjugate gradient solver. /// A * x = b /// </summary> /// <param name="n">number of rows and columns of matrix A.</param> /// <param name="csrValA">array of nnz elements, where nnz is the number of non-zero elements and can be obtained from csrRowA[m] - csrRowA[0].</param> /// <param name="csrRowA">array of n+1 index elements.</param> /// <param name="csrColA">array of nnz column indices.</param> /// <param name="dx">vector of n elements.</param> /// <param name="db">vector of n elements.</param> /// <param name="dp">vector of n elements. (temporary vector)</param> /// <param name="dAx">vector of n elements. (temporary vector)</param> /// <param name="tolerence">iterate tolerence of conjugate gradient solver.</param> /// <param name="maxIterate">max iterate count of conjugate gradient solver.</param> /// <returns>if A has singulrarity or failure in max iterate count, returns false. return true otherwise.</returns> public SolveResult CG( int n, int nnz, float[] csrValA, int[] csrRowA, int[] csrColA, float[] dx, float[] db, float[] dp, float[] dAx, float tolerence = 0.00001f, int maxIterate = 300) { SolveResult result = new SolveResult(); int k; // Iterate count. float a, b, r0, r1; float zero = 0.0f; float one = 1.0f; if (blas.DOT(db, db) == 0) { SetValue(n, dx, 0); result.IsSuccess = true; return(result); } sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, dx, ref zero, dAx); blas.AXPY(-1.0f, dAx, db); r1 = blas.DOT(db, db); k = 1; r0 = 0; while (true) { if (k > 1) { b = r1 / r0; blas.SCAL(b, dp); blas.AXPY(1.0f, db, dp); } else { blas.COPY(db, dp); } sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, dp, ref zero, dAx); a = r1 / blas.DOT(dp, dAx); blas.AXPY(a, dp, dx); blas.AXPY(-a, dAx, db); r0 = r1; r1 = blas.DOT(db, db); k++; if (r1 <= tolerence * tolerence) { result.IsSuccess = true; result.LastError = r1; result.IterateCount = k; break; } if (k > maxIterate) { result.IsSuccess = false; result.LastError = r1; result.IterateCount = k; break; } } return(result); }
public void Test_SPARSE2_CSRMV() { int nnz; // No transpose ClearBuffer(hiMatrixMN); ClearBuffer(hiVectorXN); ClearBuffer(hiVectorYM); FillBufferSparse(hiMatrixMN); FillBuffer(hiVectorXN); FillBuffer(hiVectorYM); diMatrixA = _gpu.CopyToDevice(hiMatrixMN); diVectorXN = _gpu.CopyToDevice(hiVectorXN); diVectorYM = _gpu.CopyToDevice(hiVectorYM); diNNZRows = _gpu.Allocate <int>(M); nnz = _sparse.NNZ(M, N, diMatrixA, diNNZRows); diVals = _gpu.Allocate <double>(nnz); diRows = _gpu.Allocate <int>(M + 1); diCols = _gpu.Allocate <int>(nnz); _sparse.Dense2CSR(M, N, diMatrixA, diNNZRows, diVals, diRows, diCols); _sparse.CSRMV(M, N, nnz, ref Alpha, diVals, diRows, diCols, diVectorXN, ref Beta, diVectorYM); _gpu.CopyFromDevice(diVectorYM, gpuResultM); for (int i = 0; i < M; i++) { double cpuResult = 0.0; for (int j = 0; j < N; j++) { cpuResult += Alpha * hiMatrixMN[GetIndexColumnMajor(i, j, M)] * hiVectorXN[j]; } cpuResult += Beta * hiVectorYM[i]; Assert.AreEqual(cpuResult, gpuResultM[i]); } _gpu.FreeAll(); // Transpose ClearBuffer(hiMatrixMN); ClearBuffer(hiVectorXM); ClearBuffer(hiVectorYN); FillBufferSparse(hiMatrixMN); FillBuffer(hiVectorXM); FillBuffer(hiVectorYN); diMatrixA = _gpu.CopyToDevice(hiMatrixMN); diVectorXM = _gpu.CopyToDevice(hiVectorXM); diVectorYN = _gpu.CopyToDevice(hiVectorYN); diNNZRows = _gpu.Allocate <int>(M); nnz = _sparse.NNZ(M, N, diMatrixA, diNNZRows); diVals = _gpu.Allocate <double>(nnz); diRows = _gpu.Allocate <int>(M + 1); diCols = _gpu.Allocate <int>(nnz); _sparse.Dense2CSR(M, N, diMatrixA, diNNZRows, diVals, diRows, diCols); _sparse.CSRMV(M, N, nnz, ref Alpha, diVals, diRows, diCols, diVectorXM, ref Beta, diVectorYN, SPARSE.cusparseOperation.Transpose); _gpu.CopyFromDevice(diVectorYN, gpuResultN); for (int j = 0; j < N; j++) { double cpuResult = 0.0; for (int i = 0; i < M; i++) { cpuResult += Alpha * hiMatrixMN[GetIndexColumnMajor(i, j, M)] * hiVectorXM[i]; } cpuResult += Beta * hiVectorYN[j]; Assert.AreEqual(cpuResult, gpuResultN[j]); } _gpu.FreeAll(); }