public void TestNRM2InVectorWhole() { CreateRandomData(_hostInput1); _gpu.CopyToDevice(_hostInput1, _devPtr1); float gpuRes = _blas.NRM2(_devPtr1); float hostRes = (float)Math.Sqrt(_hostInput1.Sum(f => f * f)); Assert.AreEqual(hostRes, gpuRes, 0.1); }
/// <summary> /// Solve linear system with Biconjugate gradient stabilized method (BiCGSTAB). /// </summary> /// <param name="n">number of rows and columns of matrix A.</param> /// <param name="csrValA">array of nnz elements, where nnz is the number of non-zero elements and can be obtained from csrRowA[m] - csrRowA[0].</param> /// <param name="csrRowA">array of n+1 index elements.</param> /// <param name="csrColA">array of nnz column indices.</param> /// <param name="x">vector of n elements. (updated after solving.)</param> /// <param name="b">vector of n elements.</param> /// <param name="ax">temporary memory for BiCGSTAB.</param> /// <param name="r0">temporary memory for BiCGSTAB.</param> /// <param name="r">temporary memory for BiCGSTAB.</param> /// <param name="v">temporary memory for BiCGSTAB.</param> /// <param name="p">temporary memory for BiCGSTAB.</param> /// <param name="s">temporary memory for BiCGSTAB.</param> /// <param name="t">temporary memory for BiCGSTAB.</param> /// <param name="threshold">iterate tolerence of BiCGSTAB solver.</param> /// <param name="maxIterate">max iterate count of BiCGSTAB solver.</param> /// <returns></returns> public SolveResult BiCGSTAB(int n, int nnz, double[] csrValA, int[] csrRowA, int[] csrColA, double[] x, double[] b, double[] ax, double[] r0, double[] r, double[] v, double[] p, double[] s, double[] t, double threshold = 1e-10, int maxIterate = 1000) { SolveResult result = new SolveResult(); double l0 = 1.0, alpha = 1.0, w0 = 1.0; double l1, beta, w1; double bn = blas.NRM2(b); int k = 1; double minusOne = -1.0; double one = 1.0; double zero = 0.0; blas.COPY(b, r0); sparse.CSRMV(n, n, nnz, ref minusOne, csrValA, csrRowA, csrColA, x, ref one, r0); blas.COPY(r0, r); SetValue(n, v, 0.0); SetValue(n, p, 0.0); double residual = 0.0; while (true) { l1 = blas.DOT(r0, r); beta = (l1 / l0) * (alpha / w0); // Update p blas.AXPY(-w0, v, p); blas.SCAL(beta, p); blas.AXPY(1.0, r, p); sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, p, ref zero, v); // Update v alpha = l1 / blas.DOT(r0, v); blas.COPY(r, s); blas.AXPY(-alpha, v, s); sparse.CSRMV(n, n, nnz, ref one, csrValA, csrRowA, csrColA, s, ref zero, t); w1 = blas.DOT(t, s) / blas.DOT(t, t); // Update x blas.AXPY(alpha, p, x); blas.AXPY(w1, s, x); // Update r blas.COPY(s, r); blas.AXPY(-w1, t, r); //reidual = blas.NRM2(r) / bn; residual = blas.NRM2(s); if (k > maxIterate) { result.IterateCount = k; result.IsSuccess = false; result.LastError = residual; return(result); } if (residual <= threshold) { result.IterateCount = k; result.IsSuccess = true; result.LastError = residual; return(result); } k++; w0 = w1; l0 = l1; } }