/// <summary> /// see <see cref="IMonkeyImplicitPrecond.CreateTempObjects"/> /// </summary> public override void CreateTempObjects(VectorBase x, VectorBase b, MatrixBase mtx, Device dev) { if (!x.IsLocked) { throw new ArgumentException("x must be locked.", "x"); } if (!b.IsLocked) { throw new ArgumentException("b must be locked.", "b"); } if (!mtx.IsLocked) { throw new ArgumentException("mtx must be locked.", "mtx"); } m_Matrix = mtx; m_PcInput = b; m_PcOutput = x; // create objects // ============== _xComm = x.CreateCommVector(mtx); tmp = m_Device.CreateVector(m_PcOutput.Part); // lock objects // ============ tmp.Lock(); m_InvDiag.Lock(); }
/// <summary> /// This function creates the needed temporary objects according to the given parameters /// </summary> /// <param name="pc_output">The output vector</param> /// <param name="pc_input">The input vector</param> /// <param name="mtx">The matrix that is to be multiplied</param> /// <param name="dev"></param> public override void CreateTempObjects(VectorBase pc_output, VectorBase pc_input, MatrixBase mtx, Device dev) { if (!pc_output.IsLocked) { throw new ArgumentException("pc_output must be locked.", "pc_output"); } if (!pc_input.IsLocked) { throw new ArgumentException("pc_input must be locked.", "pc_input"); } if (!mtx.IsLocked) { throw new ArgumentException("mtx must be locked.", "mtx"); } mPcInput = pc_input; mPcOutput = pc_output; m_matrix = mtx; //the temporary objects yOld = m_Device.CreateVector(mPcInput.Part); yNew = m_Device.CreateVector(mPcInput.Part); _yComm = yOld.CreateCommVector(m_matrix); //lock the temporary objects: yOld.Lock(); yNew.Lock(); }
/// <summary> /// performs <paramref name="acc"/> = <paramref name="acc"/>*<paramref name="beta"/> + <paramref name="alpha"/>*this*<paramref name="a"/>; /// </summary> /// <typeparam name="VectorType1"></typeparam> /// <typeparam name="VectorType2"></typeparam> /// <param name="alpha"></param> /// <param name="a"></param> /// <param name="beta"></param> /// <param name="acc"></param> /// <remarks> /// works only in unlocked matrix state (see <see cref="LockAbleObject.Lock"/>, <see cref="LockAbleObject.Unlock"/>); /// </remarks> virtual public void SpMV <VectorType1, VectorType2>(double alpha, VectorType1 a, double beta, VectorType2 acc) where VectorType1 : IList <double> where VectorType2 : IList <double> { if (acc.Count < this.RowPartitioning.LocalLength) { throw new ArgumentException("array is too short - must be as least as big as the local length of the row partition.", "acc"); } if (a.Count < this.ColPartition.LocalLength) { throw new ArgumentException("array is too short - must be as least as big as the local length of the column partition.", "a"); } if (object.ReferenceEquals(a, acc)) { throw new ArgumentException("in-place computation is not supported.", "a,acc"); } // create vector objects bool dummy; VectorBase _a = CreateVec(a, this.ColPartition, out dummy); using (VectorBase.CommVector _a_comm = _a.CreateCommVector(this)) { bool notWriteBackReq; VectorBase _acc = CreateVec(acc, this.RowPartitioning, out notWriteBackReq); // lock objects this.Lock(); _a.Lock(); _acc.Lock(); // check args if (!_a.Part.Equals(this.ColPartition)) { throw new ArgumentException("mismatch between column partition and partition of a.", "a"); } if (!_acc.Part.Equals(this.RowPartitioning)) { throw new ArgumentException("mismatch between row partition and partition of acc.", "acc"); } // real work: SpMV_Expert(alpha, _a_comm, beta, _acc); // unlock _a.Unlock(); _acc.Unlock(); this.Unlock(); // copy back result (if required) if (!notWriteBackReq) { _acc.GetValues(acc, 0, 0, this.RowPartitioning.LocalLength); } } }
/// <summary> /// sparse matrix/vector product; requires a locked object (<see cref="LockAbleObject.Lock"/>); This function should be /// used by the iterative solvers. /// </summary> /// <param name="alpha"></param> /// <param name="_a_comm"></param> /// <param name="beta"></param> /// <param name="_acc"></param> public void SpMV_Expert(double alpha, VectorBase.CommVector _a_comm, double beta, VectorBase _acc) { if (!this.IsLocked || !_a_comm.Owner.IsLocked || !_acc.IsLocked) { throw new ApplicationException("objects must be locked."); } if (!Object.ReferenceEquals(this, _a_comm.Mtx)) { throw new ArgumentException("input vector was not specified for this matrix.", "_a_comm"); } VectorBase _a = _a_comm.Owner; // MPI: fill send buffer // --------------------- //if (Enviroment.MPIEnv.MPI_Rank == 0) // Debugger.Break(); _a_comm.FillSendBuffer(); // Start multiplying inner Part SpMV_Local_Start(alpha, _a, beta, _acc); // A GPU implementation would start the // computation at this point; // A CPU implementation, which blocks the main processor // will leave this method empty, because the // MPI communication (transmission) should be started first // MPI communication: start transmission // ------------------------------------- _a_comm.StartTransmissionImReturn(); _a_comm.InitReceiveImReturn(); SpMV_Local_Middle(alpha, _a, beta, _acc); // On a CPU implementation, this is an ideal point for implementing // the real workload; The communication threads are started and are // waiting for the external data to arrive // wait for transmission to finish/do external parts // ------------------------------------------------- SpMV_External_Begin(alpha, beta, _acc); _a_comm.WaitCommFinish(SpMV_External_RecvCallBack); // calls the method 'GEMV_External_RecvCallBack' every time an // other processor delivers data. SpMV_Local_End(alpha, _a, beta, _acc); // Blocks, until the computation of the local part is finished; // A CPU - implementation will typically leave this method // empty. SpMV_External_Finalize(); // Here, a GPU implementation will combine the // locally computed part (done on GPU) with the external // parts (done on CPU). }
/// <summary> /// "one over diagonal elements" of the matrix /// </summary> VectorBase CreateInvDiag(IMutableMatrixEx Matrix) { VectorBase InvDiag = m_Device.CreateVector(Matrix.RowPartitioning); int i0 = (int)Matrix.RowPartitioning.i0; int L = (int)Matrix.RowPartitioning.LocalLength; for (int i = 0; i < L; i++) { InvDiag[i] = 1.0 / Matrix[i + i0, i + i0]; } return(InvDiag); }
/// <summary> /// "one over diagonal elements" of the matrix /// </summary> VectorBase CreateInvDiag() { VectorBase InvDiag = Device.CreateVector(m_Matrix.RowPartitioning); int i0 = (int)m_Matrix.RowPartitioning.i0; int L = (int)m_Matrix.RowPartitioning.LocalLength; for (int i = 0; i < L; i++) { InvDiag[i] = 1.0 / m_Matrix.GetDiagonalElement(i + i0); } return(InvDiag); }
/// <summary> /// see <see cref="IMonkeyImplicitPrecond.ReleaseTempObjects"/> /// </summary> public override void ReleaseTempObjects() { // unlock // ====== tmp.Unlock(); tmp.Dispose(); tmp = null; m_InvDiag.Unlock(); _xComm.Dispose(); _xComm = null; }
/// <summary> /// The used temporary objects are unlocked and set to null /// </summary> public override void ReleaseTempObjects() { //unlock yOld.Unlock(); yOld.Dispose(); yOld = null; _yComm.Dispose(); _yComm = null; yNew.Unlock(); yNew.Dispose(); yNew = null; }
internal CommVector(MatrixBase M, VectorBase owner) { if (!owner.Part.Equals(M.ColPartition)) { throw new ArgumentException("column partition of matrix must be equal to partion of vector", "M"); } m_Mtx = M; m_Owner = owner; // allocate receive buffers int i = 0; RecvBuffersLock = new GCHandle[m_Mtx._SpmvCommPattern.NoOfReceivedEntries.Keys.Count]; foreach (int proc in m_Mtx._SpmvCommPattern.NoOfReceivedEntries.Keys) { double[] RecvBuffer = new double[m_Mtx._SpmvCommPattern.NoOfReceivedEntries[proc]]; RecvBuffers.Add(proc, RecvBuffer); RecvBuffersLock[i] = GCHandle.Alloc(RecvBuffer, GCHandleType.Pinned); i++; } }
/// <summary> /// swaps the content of this vector with <paramref name="other"/> /// </summary> /// <remarks> /// works only in locked mode /// </remarks> abstract public void Swap(VectorBase other);
/// <summary> /// computes the inner product of this and another vector; /// </summary> /// <param name="other"></param> /// <returns></returns> /// <remarks> /// this is a MPI-collective operation; /// works only in locked mode /// </remarks> abstract public double InnerProd(VectorBase other);
/// <summary> /// this = this + <paramref name="alpha"/>*<paramref name="other"/>; /// </summary> /// <param name="alpha"></param> /// <param name="other"></param> abstract public void Acc(double alpha, VectorBase other);
/// <summary> /// called after by the hosting solver before it starts, /// to provide configuration and the possibility th create temporary objects /// </summary> /// <param name="pc_output"> /// Here, the hosting solver expects the result of the precondioning; /// see <see cref="DoPrecond"/>; /// </param> /// <param name="pc_input"> /// Here, the hosting solver will place the input vector for the precondioning; /// see <see cref="DoPrecond"/>; /// </param> /// <param name="mtx"></param> /// <param name="dev"></param> abstract public void CreateTempObjects(VectorBase pc_output, VectorBase pc_input, MatrixBase mtx, Device dev);
/// <summary> /// implementation of the CG algorithm /// </summary> /// <param name="x"></param> /// <param name="rhs"></param> /// <param name="stats"></param> protected override void CallSolver(VectorBase x, VectorBase rhs, ref SolverResult stats) { VectorBase P = Device.CreateVector(x.Part); VectorBase.CommVector commP = P.CreateCommVector(m_Matrix); VectorBase R = rhs; // rhs is only needed once, so we can use it to store residuals VectorBase V = Device.CreateVector(x.Part); VectorBase Z = Device.CreateVector(x.Part); // lock objects // ============ x.Lock(); P.Lock(); R.Lock(); V.Lock(); Z.Lock(); m_Matrix.Lock(); // configure Precond // ================= if (m_NestedPrecond != null) { m_NestedPrecond.CreateTempObjects(Z, R, m_Matrix, this.Device); } // compute P0, R0 // ============== P.Swap(x); m_Matrix.SpMV_Expert(-1.0, commP, 1.0, R); P.Swap(x); if (m_NestedPrecond != null) { m_NestedPrecond.DoPrecond(); P.CopyFrom(Z); } else { P.CopyFrom(R); } double alpha = R.InnerProd(P); double alpha_0 = alpha; double ResNorm; if (m_ConvergenceType == ConvergenceTypes.Absolute) { ResNorm = Math.Sqrt(alpha); } else if (m_ConvergenceType == ConvergenceTypes.Relative) { ResNorm = 1.0; } else { throw new NotImplementedException("unknown convergence type: " + m_ConvergenceType.ToString()); } //long total = 0; //long gemv = 0; //long rest = 0; //long st, en; // iterate // ======= stats.Converged = false; stats.NoOfIterations = 1; // one iteration has allready been performed (P0, R0) for (int n = m_MaxIterations - 2; n >= 0; n--) { if (ResNorm <= m_Tolerance && stats.NoOfIterations >= base.m_MinIterations) { stats.Converged = true; break; } if (Math.Abs(alpha) <= double.Epsilon) { // numerical breakdown break; } m_Matrix.SpMV_Expert(1.0, commP, 0, V); double lambda = alpha / V.InnerProd(P); x.Acc(lambda, P); R.Acc(-lambda, V); if (m_IterationCallback != null) { // pass approx. sol and residual to callback function x.Unlock(); R.Unlock(); double[] x_approx = new double[x.Part.LocalLength]; x.CopyTo(x_approx, 0); double[] R_curr = new double[R.Part.LocalLength]; R.CopyTo(R_curr, 0); m_IterationCallback(stats.NoOfIterations, x_approx, R_curr); x.Lock(); R.Lock(); } if (m_NestedPrecond != null) { Z.Clear(); m_NestedPrecond.DoPrecond(); } else { Z.CopyFrom(R); } double alpha_neu = R.InnerProd(Z); // compute residual norm if (m_ConvergenceType == ConvergenceTypes.Absolute) { ResNorm = Math.Sqrt(alpha); } else { ResNorm = Math.Sqrt(alpha / alpha_0); } ResNorm = Math.Sqrt(R.TwoNormSquare()); P.Scale(alpha_neu / alpha); P.Acc(1.0, Z); alpha = alpha_neu; stats.NoOfIterations++; } // unlock objects // ============== if (m_NestedPrecond != null) { m_NestedPrecond.ReleaseTempObjects(); } x.Unlock(); P.Unlock(); R.Unlock(); V.Unlock(); m_Matrix.Unlock(); commP.Dispose(); P.Dispose(); }
/// <summary> /// see <see cref="IMonkeyImplicitPrecond.Initialize"/> /// </summary> public override void Initialize(IMutableMatrixEx OrigMatrix, Device dev, MatrixType mt) { base.Initialize(OrigMatrix, dev, mt); m_InvDiag = CreateInvDiag(OrigMatrix); }
/// <summary> /// executes the Jacobi iteration /// </summary> protected override void CallSolver(VectorBase x, VectorBase rhs, ref SolverResult stats) { // create objects // ============== VectorBase.CommVector _xComm = x.CreateCommVector(m_Matrix); VectorBase tmp = Device.CreateVector(x.Part); VectorBase InvDiag = CreateInvDiag(); // lock objects // ============ x.Lock(); m_Matrix.Lock(); rhs.Lock(); tmp.Lock(); InvDiag.Lock(); // iterate // ======= stats.Converged = false; stats.NoOfIterations = 0; double residualNorm = double.MaxValue; double r_0 = double.NaN; while (true) { // loop termination // ================ if (stats.NoOfIterations >= m_MinIterations) // do at least the minimum number of iterations { if (residualNorm <= m_Tolerance) { // success stats.Converged = true; break; } if (stats.NoOfIterations >= m_MaxIterations) { // terminate break; } } // Jacobi iteration // ================ m_Matrix.SpMV_Expert(-1.0, _xComm, 0.0, tmp); // tmp = -M*x tmp.Acc(1.0, rhs); // tmp = -M*x + rhs if (m_UnderRelaxationFactor != 1.0) { tmp.Scale(m_UnderRelaxationFactor); } double r = Math.Sqrt(tmp.TwoNormSquare()); if (stats.NoOfIterations == 0) { r_0 = r; } if (m_ConvergenceType == ConvergenceTypes.Absolute) { residualNorm = r; } else { residualNorm = r / r_0; } //Console.WriteLine("JACOBI: " + residualNorm); if (m_UnderRelaxationFactor != 1.0) { tmp.Scale(m_UnderRelaxationFactor); } tmp.MultiplyElementWise(InvDiag); x.Acc(1.0, tmp); stats.NoOfIterations++; } // unlock // ====== x.Unlock(); m_Matrix.Unlock(); rhs.Unlock(); InvDiag.Unlock(); }
//[DllImport("Kernel32.dll")] //static extern bool QueryPerformanceCounter(out long lpPerformanceCount); /// <summary> /// implementation of the CG algorithm /// </summary> /// <param name="x"></param> /// <param name="rhs"></param> /// <param name="stats"></param> protected override void CallSolver(VectorBase x, VectorBase rhs, ref SolverResult stats) { VectorBase P = Device.CreateVector(x.Part); VectorBase.CommVector commP = P.CreateCommVector(m_Matrix); VectorBase R = rhs; // rhs is only needed once, so we can use it to store residuals VectorBase V = Device.CreateVector(x.Part); // lock objects // ============ x.Lock(); P.Lock(); R.Lock(); V.Lock(); m_Matrix.Lock(); // compute P0, R0 // ============== // we only need to multiply x once by the Matrix, so we don't want to create // a seperate VectorBase.CommVector - object for x; // Instead, we're temporatily exchangeing the roles of x and P; P.Swap(x); x.CopyFrom(rhs); // x = rhs m_Matrix.SpMV_Expert(-1.0, commP, 1.0, x); // x = rhs - M*x P.Swap(x); R.CopyFrom(P); double alpha = R.TwoNormSquare(); double alpha_0 = alpha; double ResNorm; if (m_ConvergenceType == ConvergenceTypes.Absolute) { ResNorm = Math.Sqrt(alpha); } else if (m_ConvergenceType == ConvergenceTypes.Relative) { ResNorm = 1.0; } else { throw new NotImplementedException("unknown convergence type: " + m_ConvergenceType.ToString()); } //long total = 0; //long gemv = 0; //long rest = 0; //long st, en; // iterate // ======= stats.Converged = false; stats.NoOfIterations = 1; // one iteration has already been performed (P0, R0) for (int n = m_MaxIterations - 2; n >= 0; n--) { if (ResNorm <= m_Tolerance && stats.NoOfIterations >= base.m_MinIterations) { stats.Converged = true; break; } if (Math.Abs(alpha) <= double.Epsilon) { // numerical breakdown break; } m_Matrix.SpMV_Expert(1.0, commP, 0, V); double lambda = alpha / V.InnerProd(P); x.Acc(lambda, P); R.Acc(-lambda, V); double alpha_neu = R.TwoNormSquare(); // compute residual norm if (m_ConvergenceType == ConvergenceTypes.Absolute) { ResNorm = Math.Sqrt(alpha); } else { ResNorm = Math.Sqrt(alpha / alpha_0); } P.Scale(alpha_neu / alpha); P.Acc(1.0, R); alpha = alpha_neu; stats.NoOfIterations++; //QueryPerformanceCounter(out st); //rest += (st - en); } //Console.WriteLine("CG: R" + stats.NoOfIterations + " = " + ResNorm); // unlock objects // ============== x.Unlock(); P.Unlock(); R.Unlock(); V.Unlock(); m_Matrix.Unlock(); commP.Dispose(); P.Dispose(); V.Dispose(); }
public MyEnum(VectorBase owner) { m_owner = owner; }
/// <summary> /// internal implementation of the solver /// </summary> /// <param name="x">on exit, (hopefully) the solution to the equation</param> /// <param name="rhs">right-hand-side</param> /// <param name="stats"> /// an implementor should at least set <see cref="SolverResult.Converged"/> /// and <see cref="SolverResult.NoOfIterations"/> /// </param> abstract protected void CallSolver(VectorBase x, VectorBase rhs, ref SolverResult stats);
/// <summary> /// For each <em>j</em>, <br/> /// this[j] = this[j]*<paramref name="other"/>[j] /// </summary> /// <param name="other"></param> abstract public void MultiplyElementWise(VectorBase other);
/// <summary> /// see <see cref="ISparseSolverExt.Solve{Tdiag, Tunknowns, Trhs}(double,Tdiag,Tunknowns,Trhs)"/>; /// </summary> public SolverResult Solve <Tdiag, Tunknowns, Trhs>(double Scale, Tdiag d, Tunknowns x, Trhs rhs) where Tdiag : IList <double> where Tunknowns : IList <double> where Trhs : IList <double> { using (var tr = new ilPSP.Tracing.FuncTrace()) { SolverResult res = new SolverResult(); Stopwatch st = new Stopwatch(); st.Reset(); st.Start(); // modify diagonal // =============== // truly, we're not solving (diag(d) + Scale*M)*x = rhs, // but ((1.0/Scale)*diag(d) + M) = (1.0/Scale)*rhs double ooScale = 1.0 / Scale; int N = int.MinValue; if (d != null) { int i0 = (int)m_Matrix.RowPartitioning.i0; N = m_Matrix.RowPartitioning.LocalLength; int Nd = d.Count; if (d.Count > N || N % Nd != 0) { throw new ArgumentException("length must be equal to or a factor of the number of rows stored on this processor", "d"); } int ix = 0; for (int i = 0; i < N; i++) { double vadd = d[ix]; ix++; if (ix >= Nd) { ix = 0; } if (vadd != 0.0) { int iglob = i + i0; double v = m_Matrix.GetDiagonalElement(iglob); v += ooScale * vadd; m_Matrix.SetDiagonalElement(iglob, v); } } } // pass values to monkey // ===================== bool shallow, dummy2; VectorBase X = Device.CreateVector(m_Matrix.ColPartition, x, out shallow); VectorBase Rhs = Device.CreateVector(m_Matrix.RowPartitioning, rhs, out dummy2); // scale rhs // ========= if (ooScale != 1.0) { Rhs.Lock(); Rhs.Scale(ooScale); Rhs.Unlock(); } // call Solver // =========== CallSolver(X, Rhs, ref res); if (res.Converged != true) { Logger.Warn("Solver did NOT CONVERGE: " + res.ToString()); } // return // ====== if (d != null) { int ix = 0; int Nd = d.Count; int i0 = (int)m_Matrix.RowPartitioning.i0; for (int i = 0; i < N; i++) { double vadd = d[ix]; ix++; if (ix >= Nd) { ix = 0; } if (vadd != 0.0) { int iglob = i + i0; double v = m_Matrix.GetDiagonalElement(iglob); v -= ooScale * vadd; m_Matrix.SetDiagonalElement(iglob, v); } } } if (!shallow) { X.GetValues(x, 0, 0, m_Matrix.ColPartition.LocalLength); } X.Dispose(); Rhs.Dispose(); st.Stop(); res.RunTime = st.Elapsed; return(res); } }
/// <summary> /// initilizes this vector to be a copy of <paramref name="other"/> /// </summary> /// <remarks> /// works only in locked mode /// </remarks> abstract public void CopyFrom(VectorBase other);
abstract internal void SpMV_Local_End(double alpha, VectorBase a, double beta, VectorBase acc);
/// <summary> /// /// </summary> /// <param name="_src"> /// source vector to copy data from /// </param> /// <param name="IdxThis"> /// indices into this vector, where to copy to /// </param> /// <param name="PerThis"> /// must be a divider of the local length of this vector /// </param> /// <param name="IdxSrc"> /// indices into vector <paramref name="_src"/>, where to copy from; /// length of this array must be equal to length of <paramref name="IdxThis"/> /// </param> /// <param name="PerSrc"> /// must be a divider of the local length of <paramref name="_src"/> /// </param> public abstract void CopyPartlyFrom(VectorBase _src, int[] IdxThis, int PerThis, int[] IdxSrc, int PerSrc);
abstract internal void SpMV_External_Begin(double alpha, double beta, VectorBase acc);