public SolveResult BiCGSTAB(int n, int nnz, float[] csrValA, int[] csrRowA, int[] csrColA, float[] x, float[] b, float[] ax, float[] r0, float[] r, float[] v, float[] p, float[] s, float[] t, float threshold = 0.000001f, int maxIterate = 1000) { SolveResult result = new SolveResult(); float l0 = 1.0f, alpha = 1.0f, w0 = 1.0f; float l1, beta, w1; float bn = blas.NRM2(b); int k = 1; float minusOne = 1.0f; float one = 1.0f; float zero = 0.0f; blas.COPY(b, r0); sparse.CSRMV(n, n, nnz, ref minusOne, csrValA, csrRowA, csrColA, x, ref one, r0); blas.COPY(r0, r); SetValue(n, v, 0.0f); SetValue(n, p, 0.0f); double residual = 0.0; while (true) { l1 = blas.DOT(r0, r); beta = (l1 / l0) * (alpha / w0); // Update p blas.AXPY(-w0, v, p); blas.SCAL(beta, p); blas.AXPY(1.0f, r, p); sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, p, ref zero, v); // Update v alpha = l1 / blas.DOT(r0, v); blas.COPY(r, s); blas.AXPY(-alpha, v, s); sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, s, ref zero, t); w1 = blas.DOT(t, s) / blas.DOT(t, t); // Update x blas.AXPY(alpha, p, x); blas.AXPY(w1, s, x); // Update r blas.COPY(s, r); blas.AXPY(-w1, t, r); //reidual = blas.NRM2(r) / bn; residual = blas.NRM2(s); if (k > maxIterate) { result.IterateCount = k; result.IsSuccess = false; result.LastError = residual; return result; } if (residual <= threshold) { result.IterateCount = k; result.IsSuccess = true; result.LastError = residual; return result; } k++; w0 = w1; l0 = l1; } }
public SolveResult CGPreconditioned( int n, int nnz, float[] csrValA, int[] csrRowA, int[] csrColA, float[] dx, float[] db, float[] csrValICP, int[] csrRowICP, int[] csrColICP, float[] dy, float[] dp, float[] domega, float[] zm1, float[] zm2, float[] rm2, float tolerence = 0.0001f, int maxIterate = 300) { SolveResult result = new SolveResult(); // Make Incomplete Cholesky Preconditioner. gpu.Launch().DefineLower(n, csrRowICP, csrColICP); gpu.Launch(n, 1).CopyAIntoH(n, csrValA, csrRowA, csrValICP, csrRowICP); gpu.Launch(n, 1).ConstructH(n, csrValICP, csrRowICP); cusparseMatDescr descrM = new cusparseMatDescr(); descrM.MatrixType = cusparseMatrixType.Triangular; descrM.FillMode = cusparseFillMode.Lower; descrM.IndexBase = cusparseIndexBase.Zero; descrM.DiagType = cusparseDiagType.NonUnit; cusparseSolveAnalysisInfo info = new cusparseSolveAnalysisInfo(); sparse.CreateSolveAnalysisInfo(ref info); cusparseSolveAnalysisInfo infoTrans = new cusparseSolveAnalysisInfo(); sparse.CreateSolveAnalysisInfo(ref infoTrans); sparse.CSRSV_ANALYSIS(n, nnz, csrValICP, csrRowICP, csrColICP, cusparseOperation.NonTranspose, info, descrM); sparse.CSRSV_ANALYSIS(n, nnz, csrValICP, csrRowICP, csrColICP, cusparseOperation.Transpose, infoTrans, descrM); int k = 0; float r1 = blas.DOT(db, db); float alpha, beta; float identityFloat = 1.0f; float zeroFloat = 0.0f; while (true) { sparse.CSRSV_SOLVE(n, ref identityFloat, csrValICP, csrRowICP, csrColICP, db, dy, cusparseOperation.NonTranspose, info, descrM); sparse.CSRSV_SOLVE(n, ref identityFloat, csrValICP, csrRowICP, csrColICP, dy, zm1, cusparseOperation.Transpose, infoTrans, descrM); k++; if (k == 1) { blas.COPY(zm1, dp); } else { beta = blas.DOT(db, zm1) / blas.DOT(rm2, zm2); blas.SCAL(beta, dp); blas.AXPY(1.0f, zm1, dp); } sparse.CSRMV(n, n, nnz, ref identityFloat, csrValA, csrRowA, csrColA, dp, ref zeroFloat, domega); alpha = blas.DOT(db, zm1) / blas.DOT(dp, domega); blas.AXPY(alpha, dp, dx); blas.COPY(db, rm2); blas.COPY(zm1, zm2); blas.AXPY(-alpha, domega, db); r1 = blas.DOT(db, db); if (r1 <= tolerence * tolerence) { result.IsSuccess = true; result.IterateCount = k; result.LastError = r1; break; } if (k > maxIterate) { result.IsSuccess = false; result.IterateCount = k; result.LastError = r1; break; } } return result; }
public SolveResult CG( int n, int nnz, double[] csrValA, int[] csrRowA, int[] csrColA, double[] dx, double[] db, double[] dp, double[] dAx, double tolerence = 0.00001f, int maxIterate = 300) { SolveResult result = new SolveResult(); int k; // Iterate count. double a, b, r0, r1; double zero = 0.0; double one = 1.0; if (blas.DOT(db, db) == 0.0) { SetValue(n, dx, 0); result.IsSuccess = true; return result; } sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, dx, ref zero, dAx); blas.AXPY(-1.0f, dAx, db); r1 = blas.DOT(db, db); k = 1; r0 = 0; while (true) { if (k > 1) { b = r1 / r0; blas.SCAL(b, dp); blas.AXPY(1.0f, db, dp); } else { blas.COPY(db, dp); } sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, dp, ref zero, dAx); a = r1 / blas.DOT(dp, dAx); blas.AXPY(a, dp, dx); blas.AXPY(-a, dAx, db); r0 = r1; r1 = blas.DOT(db, db); k++; if (r1 <= tolerence * tolerence) { result.IsSuccess = true; result.LastError = r1; result.IterateCount = k; break; } if (k > maxIterate) { result.IsSuccess = false; result.LastError = r1; result.IterateCount = k; break; } } return result; }
public SolveResult BiCGSTAB(int n, int nnz, float[] csrValA, int[] csrRowA, int[] csrColA, float[] x, float[] b, float[] ax, float[] r0, float[] r, float[] v, float[] p, float[] s, float[] t, float threshold = 0.000001f, int maxIterate = 1000) { SolveResult result = new SolveResult(); float l0 = 1.0f, alpha = 1.0f, w0 = 1.0f; float l1, beta, w1; float bn = blas.NRM2(b); int k = 1; float minusOne = 1.0f; float one = 1.0f; float zero = 0.0f; blas.COPY(b, r0); sparse.CSRMV(n, n, nnz, ref minusOne, csrValA, csrRowA, csrColA, x, ref one, r0); blas.COPY(r0, r); SetValue(n, v, 0.0f); SetValue(n, p, 0.0f); double residual = 0.0; while (true) { l1 = blas.DOT(r0, r); beta = (l1 / l0) * (alpha / w0); // Update p blas.AXPY(-w0, v, p); blas.SCAL(beta, p); blas.AXPY(1.0f, r, p); sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, p, ref zero, v); // Update v alpha = l1 / blas.DOT(r0, v); blas.COPY(r, s); blas.AXPY(-alpha, v, s); sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, s, ref zero, t); w1 = blas.DOT(t, s) / blas.DOT(t, t); // Update x blas.AXPY(alpha, p, x); blas.AXPY(w1, s, x); // Update r blas.COPY(s, r); blas.AXPY(-w1, t, r); //reidual = blas.NRM2(r) / bn; residual = blas.NRM2(s); if (k > maxIterate) { result.IterateCount = k; result.IsSuccess = false; result.LastError = residual; return(result); } if (residual <= threshold) { result.IterateCount = k; result.IsSuccess = true; result.LastError = residual; return(result); } k++; w0 = w1; l0 = l1; } }
public SolveResult CGPreconditioned( int n, int nnz, float[] csrValA, int[] csrRowA, int[] csrColA, float[] dx, float[] db, float[] csrValICP, int[] csrRowICP, int[] csrColICP, float[] dy, float[] dp, float[] domega, float[] zm1, float[] zm2, float[] rm2, float tolerence = 0.0001f, int maxIterate = 300) { SolveResult result = new SolveResult(); // Make Incomplete Cholesky Preconditioner. gpu.Launch().DefineLower(n, csrRowICP, csrColICP); gpu.Launch(n, 1).CopyAIntoH(n, csrValA, csrRowA, csrValICP, csrRowICP); gpu.Launch(n, 1).ConstructH(n, csrValICP, csrRowICP); cusparseMatDescr descrM = new cusparseMatDescr(); descrM.MatrixType = cusparseMatrixType.Triangular; descrM.FillMode = cusparseFillMode.Lower; descrM.IndexBase = cusparseIndexBase.Zero; descrM.DiagType = cusparseDiagType.NonUnit; cusparseSolveAnalysisInfo info = new cusparseSolveAnalysisInfo(); sparse.CreateSolveAnalysisInfo(ref info); cusparseSolveAnalysisInfo infoTrans = new cusparseSolveAnalysisInfo(); sparse.CreateSolveAnalysisInfo(ref infoTrans); sparse.CSRSV_ANALYSIS(n, nnz, csrValICP, csrRowICP, csrColICP, cusparseOperation.NonTranspose, info, descrM); sparse.CSRSV_ANALYSIS(n, nnz, csrValICP, csrRowICP, csrColICP, cusparseOperation.Transpose, infoTrans, descrM); int k = 0; float r1 = blas.DOT(db, db); float alpha, beta; float identityFloat = 1.0f; float zeroFloat = 0.0f; while (true) { sparse.CSRSV_SOLVE(n, ref identityFloat, csrValICP, csrRowICP, csrColICP, db, dy, cusparseOperation.NonTranspose, info, descrM); sparse.CSRSV_SOLVE(n, ref identityFloat, csrValICP, csrRowICP, csrColICP, dy, zm1, cusparseOperation.Transpose, infoTrans, descrM); k++; if (k == 1) { blas.COPY(zm1, dp); } else { beta = blas.DOT(db, zm1) / blas.DOT(rm2, zm2); blas.SCAL(beta, dp); blas.AXPY(1.0f, zm1, dp); } sparse.CSRMV(n, n, nnz, ref identityFloat, csrValA, csrRowA, csrColA, dp, ref zeroFloat, domega); alpha = blas.DOT(db, zm1) / blas.DOT(dp, domega); blas.AXPY(alpha, dp, dx); blas.COPY(db, rm2); blas.COPY(zm1, zm2); blas.AXPY(-alpha, domega, db); r1 = blas.DOT(db, db); if (r1 <= tolerence * tolerence) { result.IsSuccess = true; result.IterateCount = k; result.LastError = r1; break; } if (k > maxIterate) { result.IsSuccess = false; result.IterateCount = k; result.LastError = r1; break; } } return(result); }
public SolveResult CG( int n, int nnz, double[] csrValA, int[] csrRowA, int[] csrColA, double[] dx, double[] db, double[] dp, double[] dAx, double tolerence = 0.00001f, int maxIterate = 300) { SolveResult result = new SolveResult(); int k; // Iterate count. double a, b, r0, r1; double zero = 0.0; double one = 1.0; if (blas.DOT(db, db) == 0.0) { SetValue(n, dx, 0); result.IsSuccess = true; return(result); } sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, dx, ref zero, dAx); blas.AXPY(-1.0f, dAx, db); r1 = blas.DOT(db, db); k = 1; r0 = 0; while (true) { if (k > 1) { b = r1 / r0; blas.SCAL(b, dp); blas.AXPY(1.0f, db, dp); } else { blas.COPY(db, dp); } sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, dp, ref zero, dAx); a = r1 / blas.DOT(dp, dAx); blas.AXPY(a, dp, dx); blas.AXPY(-a, dAx, db); r0 = r1; r1 = blas.DOT(db, db); k++; if (r1 <= tolerence * tolerence) { result.IsSuccess = true; result.LastError = r1; result.IterateCount = k; break; } if (k > maxIterate) { result.IsSuccess = false; result.LastError = r1; result.IterateCount = k; break; } } return(result); }