/// <summary> /// The Infinity-Norm (maximum absolute row sum norm) of this matrix; /// </summary> /// <returns></returns> static public double InfNorm(this IMutableMatrixEx M) { using (var tr = new FuncTrace()) { double normLoc = 0; int L = M.RowPartitioning.LocalLength; int[] col = null; double[] val = null; int Lr; for (int i = 0; i < L; i++) { double rownrm = 0; Lr = M.GetRow(i + M.RowPartitioning.i0, ref col, ref val); for (int j = 0; j < Lr; j++) { rownrm += Math.Abs(val[j]); } normLoc = Math.Max(normLoc, rownrm); } //Console.WriteLine("local norm (R=" + M.RowPartitioning.Rank + ") = " + normLoc); //tr.Info("local norm " + normLoc); double normGlob = double.NaN; unsafe { csMPI.Raw.Allreduce((IntPtr)(&normLoc), (IntPtr)(&normGlob), 1, csMPI.Raw._DATATYPE.DOUBLE, csMPI.Raw._OP.MAX, csMPI.Raw._COMM.WORLD); } return(normGlob); } }
/// <summary> /// returns the number of off-diagonal non-zero entries in Row <paramref name="iRow"/>; /// </summary> /// <param name="iRow">local row index</param> /// <param name="M">matrix to operate on</param> /// <returns></returns> static public int GetNoOfOffDiagonalNonZerosPerRow(this IMutableMatrixEx M, int iRow) { int cnt = 0; int[] col = null; double[] val = null; int L = M.GetRow(iRow, ref col, ref val); for (int l = 0; l < L; l++) { if (col[l] != iRow && val[l] != 0.0) { cnt++; } } return(cnt); }
/// <summary> /// Writes the matrix (including all zeros) into a string (for debugging purposes). /// Basically, it uses a tabulator separated format (which can e.g. be /// imported into Matlab via <code>matrix = dlmread(path)</code> /// </summary> /// <param name="tis"> /// the matrix to save /// </param> static public string ToStringDense(this IMutableMatrixEx tis) { string OutputString = ""; double[] val = null; int[] col = null; int L; for (int i = 0; i < tis.RowPartitioning.LocalLength; i++) { L = tis.GetRow(i + tis.RowPartitioning.i0, ref col, ref val); int currentColumn = -1; string separator = ""; for (int j = 0; j < L; j++) { // Beware of undefined entries (see MSREntry) // Add zeros for missing columns (the sparse format does // not store zero values) for (int k = currentColumn + 1; k < col[j]; k++) { OutputString += String.Format(separator + "{0,14:F0}", 0.0); separator = "\t"; } // Enforce use of . as decimal separator in scientific format OutputString += (separator + val[j].ToString( "E", System.Globalization.CultureInfo.InvariantCulture).PadLeft(14)); currentColumn = col[j]; separator = "\t"; } // Add zeros for columns following after the last entry for (int j = currentColumn + 1; j < tis.NoOfCols; j++) { OutputString += String.Format(separator + "{0,14:F0}", 0.0); } OutputString += ("\n"); } return(OutputString); }
/// <summary> /// Sets all entries in a row to 0 /// </summary> /// <param name="i">row index in global indices</param> static public void ClearRow(this IMutableMatrixEx M, int i) { int[] ColIdx = null; double[] Values = null; int L = M.GetRow(i, ref ColIdx, ref Values); Array.Clear(Values, 0, L); Debug.Assert(ColIdx.Length >= L); Debug.Assert(Values.Length >= L); if (ColIdx.Length > L) { Array.Resize(ref ColIdx, L); } if (Values.Length > L) { Array.Resize(ref Values, L); } M.SetValues(i, ColIdx, Values); }
/// <summary> /// converts an arbitrary mutable matrix to an <see cref="MsrMatrix"/>. /// </summary> /// <param name="M"></param> /// <returns></returns> static public MsrMatrix ToMsrMatrix(this IMutableMatrixEx M) { using (new FuncTrace()) { MsrMatrix R = new MsrMatrix(M.RowPartitioning, M.ColPartition); int[] col = null; double[] val = null; int i0 = (int)R.RowPartitioning.i0, L = R.RowPartitioning.LocalLength; for (int i = 0; i < L; i++) { int iRow = i0 + i; int Lr = M.GetRow(iRow, ref col, ref val); R.SetRow(iRow, col, val, Lr); } return(R); } }
/// <summary> /// performs the operation: <paramref name="Acc"/> = <paramref name="Acc"/> + <paramref name="alpha"/>*<paramref name="M"/> /// </summary> /// <param name="Acc"> /// Input/Output: the accumulator /// </param> /// <param name="alpha"> /// scaling for accumulation /// </param> /// <param name="M"> /// Input: the matrix that is accumulated; unchanged on exit. /// </param> public static void Acc(this IMutableMatrix Acc, double alpha, IMutableMatrixEx M) { if (Acc.NoOfCols != M.NoOfCols) { throw new ArgumentException("mismatch in number of columns"); } if (Acc.NoOfRows != M.NoOfRows) { throw new ArgumentException("mismatch in number of rows"); } if (!Acc.RowPartitioning.EqualsPartition(M.RowPartitioning)) { throw new ArgumentException("unable to perform Acc - operation: matrices must have equal row partition."); } MsrMatrix _M = M as MsrMatrix; int I = Acc.RowPartitioning.LocalLength; int i0 = (int)Acc.RowPartitioning.i0; double[] val = null; int[] col = null; int L; for (int i = 0; i < I; i++) { int iRow = i + i0; L = M.GetRow(iRow, ref col, ref val); for (int l = 0; l < L; l++) { Acc[iRow, col[l]] += alpha * val[l]; } } }
/// <summary> /// collects all locally stored rows of matrix <paramref name="M"/> /// </summary> static public MsrMatrix.MatrixEntry[][] GetAllEntries(this IMutableMatrixEx M) { int i0 = (int)(M.RowPartitioning.i0), L = M.RowPartitioning.LocalLength; MsrMatrix.MatrixEntry[][] ret = new MsrMatrix.MatrixEntry[L][]; double[] val = null; int[] col = null; int Lr; for (int i = 0; i < L; i++) { //ret[i] = M.GetRow(i + i0); Lr = M.GetRow(i + i0, ref col, ref val); var row = new MsrMatrix.MatrixEntry[Lr]; for (int lr = 0; lr < Lr; lr++) { row[lr].m_ColIndex = col[lr]; row[lr].Value = val[lr]; } ret[i] = row; } return(ret); }
/// <summary> /// only for debugging and testing; converts the matrix to a full matrix; /// when running in parallel, the matrix is collected on process 0. /// </summary> /// <returns> /// null on all MPI processes, except on rank 0; /// </returns> static public MultidimensionalArray ToFullMatrixOnProc0(this IMutableMatrixEx tis) { var comm = tis.MPI_Comm; int Rank = tis.RowPartitioning.MpiRank; int Size = tis.RowPartitioning.MpiSize; double[,] ret = null; if (Rank == 0) { ret = new double[tis.NoOfRows, tis.NoOfCols]; } SerialisationMessenger sms = new SerialisationMessenger(comm); if (Rank > 0) { sms.SetCommPath(0); } sms.CommitCommPaths(); Tuple <int, int[], double[]>[] data; { int L = tis.RowPartitioning.LocalLength; data = new Tuple <int, int[], double[]> [L]; int i0 = (int)tis.RowPartitioning.i0; for (int i = 0; i < L; i++) { double[] val = null; // this mem. must be inside the loop/allocated for every i and cannot be reused because we need all rows later. int[] col = null; int Lr = tis.GetRow(i + i0, ref col, ref val); data[i] = new Tuple <int, int[], double[]>(Lr, col, val); } } if (Rank > 0) { sms.Transmitt(0, data); } int rcvProc = 0; if (Rank == 0) { do { int i0 = (int)tis.RowPartitioning.GetI0Offest(rcvProc); if (data.Length != tis.RowPartitioning.GetLocalLength(rcvProc)) { throw new ApplicationException("internal error"); } for (int i = 0; i < data.Length; i++) { //foreach (MsrMatrix.MatrixEntry entry in data[i]) { // if (entry.m_ColIndex >= 0) // ret[i + i0, entry.m_ColIndex] = entry.Value; //} int Lr = data[i].Item1; int[] col = data[i].Item2; double[] val = data[i].Item3; for (int lr = 0; lr < Lr; lr++) { ret[i + i0, col[lr]] = val[lr]; } } } while (sms.GetNext(out rcvProc, out data)); } else { if (sms.GetNext(out rcvProc, out data)) { throw new ApplicationException("internal error"); } } if (Rank == 0) { var _ret = MultidimensionalArray.Create(ret.GetLength(0), ret.GetLength(1)); for (int i = 0; i < _ret.NoOfRows; i++) { for (int j = 0; j < _ret.NoOfCols; j++) { _ret[i, j] = ret[i, j]; } } return(_ret); } else { return(null); } }
/// <summary> /// accumulates a dense matrix <paramref name="FullMtx"/> to a sparse matrix /// -- certainly, only adviseable for small matrices. /// </summary> static public void AccDenseMatrix(this IMutableMatrixEx tis, double alpha, IMatrix FullMtx) { if (tis.RowPartitioning.LocalLength != FullMtx.NoOfRows) { throw new ArgumentException("Mismatch in number of rows."); } if (tis.ColPartition.TotalLength != FullMtx.NoOfCols) { throw new ArgumentException("Mismatch in number of columns."); } int i0 = tis.RowPartitioning.i0; int I = tis.RowPartitioning.LocalLength; int J = tis.ColPartition.TotalLength; int[] col = null; double[] val = null; for (int i = 0; i < I; i++) { int Lr = tis.GetRow(i + i0, ref col, ref val); var oldRow = new MsrMatrix.MatrixEntry[Lr]; for (int lr = 0; lr < Lr; lr++) { oldRow[i].m_ColIndex = col[lr]; oldRow[i].Value = val[lr]; } List <int> NewColIdx = new List <int>(J); List <double> NewVals = new List <double>(J); for (int j = 0; j < J; j++) { double FMij = FullMtx[i, j]; if (FMij != 0.0) { NewVals.Add(alpha * FMij); NewColIdx.Add(j); } } Array.Sort <MsrMatrix.MatrixEntry>(oldRow); int k1 = 0, k2 = 0, K1 = oldRow.Length, K2 = NewVals.Count; while (k1 < K1 && k2 < K2) { int j1 = oldRow[k1].m_ColIndex; int j2 = NewColIdx[k2]; if (j1 < 0) { // should also chrash in RELEASE, therefor -> Exception. throw new ApplicationException("expecting a row without un-allocated entries."); } if (j1 > j2) { // k2++; // new row neds to catch up } else if (j1 < j2) { k1++; } else { NewVals[k2] += oldRow[k1].Value; k1++; k2++; } } tis.SetValues(i + i0, NewColIdx.ToArray(), NewVals.ToArray()); } }
/// <summary> /// finds maximum and minimum entry -- within the part that is stored on the local MPI process -- /// of some matrix. /// </summary> /// <param name="M">input; the matrix to work on</param> /// <param name="Min">output: the minimum entry of <paramref name="M"/> within the local MPI process</param> /// <param name="MinRow">output: the row index, where <paramref name="Min"/> is located</param> /// <param name="MinCol">output: the column index, where <paramref name="Min"/> is located</param> /// <param name="Max">output: the maximum entry of <paramref name="M"/> within the local MPI process</param> /// <param name="MaxRow">output: the row index, where <paramref name="Max"/> is located</param> /// <param name="MaxCol">output: the column index, where <paramref name="Max"/> is located</param> public static void GetMinimumAndMaximum_MPILocal(this IMutableMatrixEx M, out double Min, out int MinRow, out int MinCol, out double Max, out int MaxRow, out int MaxCol) { Min = double.MaxValue; Max = double.MinValue; MinRow = int.MinValue; MinCol = int.MinValue; MaxRow = int.MinValue; MaxCol = int.MinValue; MsrMatrix _M = M as MsrMatrix; bool t = false; int I = M.RowPartitioning.LocalLength; int i0 = (int)M.RowPartitioning.i0; double[] val = null; int[] col = null; int L; for (int i = 0; i < I; i++) { int iRow = i + i0; L = M.GetRow(iRow, ref col, ref val); for (int l = 0; l < L; l++) { t = true; int ColIndex = col[l]; double Value = val[l]; if (Value > Max) { Max = Value; MaxRow = iRow; MaxCol = ColIndex; } if (Value < Min) { Min = Value; MinRow = iRow; MinCol = ColIndex; } } } if (!t) { // matrix is completely empty -> min and max is 0.0, 1st occurrence per def. @ (0,0) Min = 0; MinCol = 0; MinRow = 0; Max = 0; MaxCol = 0; MaxRow = 0; } }
/// <summary> /// matrix assembly; must be called by each implementation, /// </summary> /// <param name="M"></param> protected void PackMatrix(IMutableMatrixEx M) { ilPSP.MPICollectiveWatchDog.Watch(); IPartitioning rp = M.RowPartitioning; IPartitioning cp = m_ColPart; // define Comm List // ================ SortedDictionary <int, List <int> > CommLists = new SortedDictionary <int, List <int> >(); // keys: processor rank p // values: List of global indices, which processor p needs to send to this processor int Lr; double[] val = null; int[] col = null; int L = rp.LocalLength; int i0 = (int)rp.i0; for (int iLoc = 0; iLoc < L; iLoc++) // loop over all matrix rows... { int iGlob = i0 + iLoc; //MsrMatrix.MatrixEntry[] row = (asMsr==null) ? M.GetRow(iGlob) : asMsr.GetRowShallow(iGlob); Lr = M.GetOccupiedColumnIndices(iGlob, ref col); for (int j = 0; j < Lr; j++) // loop over all nonzero entries in row 'iGlob' { int jGlob = col[j]; if (cp.i0 <= jGlob && jGlob < (cp.i0 + cp.LocalLength)) { // Entry on current processor } else { int proc = cp.FindProcess(jGlob); // Entry on Processor proc if (!CommLists.ContainsKey(proc)) { CommLists.Add(proc, new List <int>()); } List <int> CommList_proc = CommLists[proc]; if (!CommList_proc.Contains(jGlob)) // a lot of room for optimization { CommList_proc.Add(jGlob); } } } } // sort com list // ============= { foreach (List <int> cl in CommLists.Values) { cl.Sort(); } } // define matrix // ============= { TempCSR intTmp = new TempCSR(); SortedDictionary <int, ExternalTmp> extTmp = new SortedDictionary <int, ExternalTmp>(); foreach (int proc in CommLists.Keys) { extTmp.Add(proc, new ExternalTmp()); } for (int iLoc = 0; iLoc < L; iLoc++) { int iGlob = i0 + iLoc; Lr = M.GetRow(iGlob, ref col, ref val); for (int j = 0; j < Lr; j++) { int jGlob = col[j]; double Value = val[j]; bool bIsDiag = (iGlob == jGlob); if (cp.i0 <= jGlob && jGlob < (cp.i0 + cp.LocalLength)) { // Entry on current processor intTmp.AddEntry(jGlob - (int)cp.i0, Value, bIsDiag); } else { int proc = cp.FindProcess(jGlob); // Entry on Processor proc List <int> CommList_proc = CommLists[proc]; int jloc = CommList_proc.IndexOf(jGlob); ExternalTmp et = extTmp[proc]; et.AddEntry(jloc, jGlob, Value); } } intTmp.NextRow(); foreach (ExternalTmp et in extTmp.Values) { et.NextRow(); } } m_LocalMtx = AssembleFinalFormat(intTmp); ExtMatrix = new Dictionary <int, External>(); foreach (int proc in extTmp.Keys) { ExtMatrix.Add(proc, extTmp[proc].GetFinalObj()); } } // send/receive & transform Comm lists // ==================================== { SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); SortedDictionary <int, int[]> CommListsTo = new SortedDictionary <int, int[]>(); foreach (int proc in CommLists.Keys) { sms.SetCommPath(proc); } sms.CommitCommPaths(); foreach (int proc in CommLists.Keys) { sms.Transmitt(proc, CommLists[proc].ToArray()); } int _proc; int[] CommListReceived; sms.GetNext(out _proc, out CommListReceived); int Lcol = m_ColPart.LocalLength; int i0col = (int)m_ColPart.i0; while (CommListReceived != null) { // convert indices to local coordinates for (int i = 0; i < CommListReceived.Length; i++) { CommListReceived[i] -= i0col; // check: if (CommListReceived[i] < 0 || CommListReceived[i] >= Lcol) { throw new ApplicationException("internal error: something wrong with received Comm List."); } } CommListsTo.Add(_proc, CommListReceived); sms.GetNext(out _proc, out CommListReceived); } sms.Dispose(); m_SpmvCommPattern = new SpmvCommPattern(); m_SpmvCommPattern.ComLists = CommListsTo; } // record the number of elements which we receive // ============================================== { m_SpmvCommPattern.NoOfReceivedEntries = new Dictionary <int, int>(); foreach (int p in CommLists.Keys) { m_SpmvCommPattern.NoOfReceivedEntries.Add(p, CommLists[p].Count); } } }
public Matrix(IMutableMatrixEx M) { if (M.RowPartitioning.IsMutable) { throw new NotSupportedException(); } if (M.ColPartition.IsMutable) { throw new NotSupportedException(); } if (M.NoOfCols != M.NoOfRows) { throw new ArgumentException("Matrix must be quadratic.", "M"); } if ((M is MsrMatrix) && ((MsrMatrix)M).AssumeSymmetric) { this.Symmetric = 2; } RowPart = M.RowPartitioning; int size = M.RowPartitioning.MpiSize, rank = M.RowPartitioning.MpiRank; Debug.Assert(M.RowPartitioning.MpiRank == M.ColPartition.MpiRank); Debug.Assert(M.RowPartitioning.MpiSize == M.ColPartition.MpiSize); var comm = M.MPI_Comm; int LR; int[] col = null; double[] val = null; if (size == 0) { // serial init on one processor // ++++++++++++++++++++++++++++ n = (int)M.RowPartitioning.TotalLength; int len; if (Symmetric == 2) { // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len = M.GetGlobalNoOfUpperTriangularNonZeros() + n; } else { len = M.GetTotalNoOfNonZerosPerProcess(); } int Nrows = M.RowPartitioning.LocalLength; nz = len; int cnt = 0; irn = new int[len]; jrn = new int[len]; a = new double[len]; for (int i = 0; i < Nrows; i++) { //irn[i] = cnt; int iRow = M.RowPartitioning.i0 + i; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric == 2 && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element jrn[cnt] = iRow; a[cnt] = 0.0; cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric == 2 && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { irn[cnt] = iRow + 1; jrn[cnt] = col[j] + 1; a[cnt] = val[j]; cnt++; } } } //if (M.GetTotalNoOfNonZeros() != len) // throw new Exception(); } if (len != cnt) { throw new ApplicationException("internal error."); } } else { // collect local matrices // +++++++++++++++++++++++ // Number of elements, start indices for index pointers // ==================================================== int len_loc; if (Symmetric == 2) { // number of entries is: // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len_loc = M.GetLocalNoOfUpperTriangularNonZeros() + M.RowPartitioning.LocalLength; } else { len_loc = M.GetTotalNoOfNonZerosPerProcess(); } Partitioning part = new Partitioning(len_loc, comm); if (part.TotalLength > int.MaxValue) { throw new ApplicationException("too many matrix entries for MUMPS - more than maximum 32-bit signed integer"); } // local matrix assembly // ===================== int n_loc = M.RowPartitioning.LocalLength; this.nz_loc = len_loc; int[] ia_loc = new int[len_loc]; int[] ja_loc = new int[len_loc]; double[] a_loc = new double[len_loc]; { int cnt = 0; int i0 = (int)part.i0; for (int i = 0; i < n_loc; i++) { //ia_loc[i] = cnt + 1 + i0; // fortran indexing int iRow = i + (int)M.RowPartitioning.i0; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric == 2 && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja_loc[cnt] = iRow + 1; // fortran indexing a_loc[cnt] = 0.0; cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric == 2 && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ia_loc[cnt] = iRow + 1; ja_loc[cnt] = col[j] + 1; // fortran indexing a_loc[cnt] = val[j]; cnt++; } } } } if (cnt != len_loc) { throw new ApplicationException("internal error."); } } nz = part.TotalLength; n = (int)M.RowPartitioning.TotalLength; this.a_loc = a_loc; this.irn_loc = ia_loc; this.jrn_loc = ja_loc; this.n_loc = (int)M.RowPartitioning.TotalLength; GC.Collect(); } }
/// <summary> /// initializes this matrix as a copy of the matrix <paramref name="M"/>. /// </summary> /// <param name="M"></param> public Matrix(IMutableMatrixEx M) { if (M.RowPartitioning.IsMutable) { throw new NotSupportedException(); } if (M.ColPartition.IsMutable) { throw new NotSupportedException(); } if (M.NoOfCols != M.NoOfRows) { throw new ArgumentException("Matrix must be quadratic.", "M"); } this.Symmetric = (M is MsrMatrix) && ((MsrMatrix)M).AssumeSymmetric; RowPart = M.RowPartitioning; int size = M.RowPartitioning.MpiSize, rank = M.RowPartitioning.MpiRank; Debug.Assert(M.RowPartitioning.MpiRank == M.ColPartition.MpiRank); Debug.Assert(M.RowPartitioning.MpiSize == M.ColPartition.MpiSize); var comm = M.MPI_Comm; int LR; int[] col = null; double[] val = null; if (size == 1) { // serial init on one processor // ++++++++++++++++++++++++++++ n = (int)M.RowPartitioning.TotalLength; int len; if (Symmetric) { // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len = M.GetGlobalNoOfUpperTriangularNonZeros() + n; } else { len = M.GetTotalNoOfNonZerosPerProcess(); } int Nrows = M.RowPartitioning.LocalLength; int cnt = 0; ia = new int[n + 1]; ja = new int[len]; a = new double[len]; for (int i = 0; i < Nrows; i++) { ia[i] = cnt + 1; // fortran indexing int iRow = M.RowPartitioning.i0 + i; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja[cnt] = iRow + 1; // fortran indexing a[cnt] = 0.0; cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja[cnt] = col[j] + 1; // fortran indexing a[cnt] = val[j]; cnt++; } } } //if (M.GetTotalNoOfNonZeros() != len) // throw new Exception(); } ia[Nrows] = cnt + 1; // fortran indexing if (len != cnt) { throw new ApplicationException("internal error."); } } else { // collect matrix on processor 0 // +++++++++++++++++++++++++++++ // Number of elements, start indices for index pointers // ==================================================== int len_loc; if (Symmetric) { // number of entries is: // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len_loc = M.GetLocalNoOfUpperTriangularNonZeros() + M.RowPartitioning.LocalLength; } else { len_loc = M.GetTotalNoOfNonZerosPerProcess(); } Partitioning part = new Partitioning(len_loc, comm); if (part.TotalLength > int.MaxValue) { throw new ApplicationException("too many matrix entries for PARDISO - more than maximum 32-bit signed integer"); } // local matrix assembly // ===================== int n_loc = M.RowPartitioning.LocalLength; int[] ia_loc = new int[n_loc]; int[] ja_loc = new int[len_loc]; double[] a_loc = new double[len_loc]; { int cnt = 0; int i0 = (int)part.i0; for (int i = 0; i < n_loc; i++) { ia_loc[i] = cnt + 1 + i0; // fortran indexing int iRow = i + (int)M.RowPartitioning.i0; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja_loc[cnt] = iRow + 1; // fortran indexing a_loc[cnt] = 0.0; cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja_loc[cnt] = col[j] + 1; // fortran indexing a_loc[cnt] = val[j]; cnt++; } } } } if (cnt != len_loc) { throw new ApplicationException("internal error."); } } // assemble complete matrix on proc. 0 // =================================== if (rank == 0) { n = M.RowPartitioning.TotalLength; // process 0: collect data from other processors // +++++++++++++++++++++++++++++++++++++++++++++ this.ia = new int[M.RowPartitioning.TotalLength + 1]; this.ja = new int[part.TotalLength]; this.a = new double[part.TotalLength]; Array.Copy(ia_loc, 0, this.ia, 0, ia_loc.Length); Array.Copy(ja_loc, 0, this.ja, 0, ja_loc.Length); Array.Copy(a_loc, 0, this.a, 0, a_loc.Length); unsafe { fixed(int *pia = &this.ia[0], pja = &this.ja[0]) { fixed(double *pa = &this.a[0]) { for (int rcv_rank = 1; rcv_rank < size; rcv_rank++) { MPI_Status status; csMPI.Raw.Recv((IntPtr)(pa + part.GetI0Offest(rcv_rank)), part.GetLocalLength(rcv_rank), csMPI.Raw._DATATYPE.DOUBLE, rcv_rank, 321555 + rcv_rank, comm, out status); csMPI.Raw.Recv((IntPtr)(pja + part.GetI0Offest(rcv_rank)), part.GetLocalLength(rcv_rank), csMPI.Raw._DATATYPE.INT, rcv_rank, 32155 + rcv_rank, comm, out status); csMPI.Raw.Recv((IntPtr)(pia + M.RowPartitioning.GetI0Offest(rcv_rank)), M.RowPartitioning.GetLocalLength(rcv_rank), csMPI.Raw._DATATYPE.INT, rcv_rank, 3215 + rcv_rank, comm, out status); } } } } this.ia[M.RowPartitioning.TotalLength] = (int)part.TotalLength + 1; } else { // send data to process 0 // ++++++++++++++++++++++ unsafe { fixed(void *pia = &ia_loc[0], pja = &ja_loc[0], pa = &a_loc[0]) { csMPI.Raw.Send((IntPtr)pa, a_loc.Length, csMPI.Raw._DATATYPE.DOUBLE, 0, 321555 + rank, csMPI.Raw._COMM.WORLD); csMPI.Raw.Send((IntPtr)pja, ja_loc.Length, csMPI.Raw._DATATYPE.INT, 0, 32155 + rank, csMPI.Raw._COMM.WORLD); csMPI.Raw.Send((IntPtr)pia, ia_loc.Length, csMPI.Raw._DATATYPE.INT, 0, 3215 + rank, csMPI.Raw._COMM.WORLD); } } } ia_loc = null; ja_loc = null; a_loc = null; GC.Collect(); } //if(rank == 0) { // Console.WriteLine("PARDISO.Matrix: Number of nonzeros: " + this.a.Length); // Console.WriteLine("PARDISO.Matrix: sum of entries: " + this.a.Sum()); //} //SaveToTextFile("C:\\tmp\\pard.txt"); }
/// <summary> /// initializes this matrix as a copy of the matrix <paramref name="M"/>. /// </summary> public Matrix(IMutableMatrixEx M, bool __UseDoublePrecision) { using (var tr = new FuncTrace()) { this.UseDoublePrecision = __UseDoublePrecision; if (M.RowPartitioning.IsMutable) { throw new NotSupportedException(); } if (M.ColPartition.IsMutable) { throw new NotSupportedException(); } if (M.NoOfCols != M.NoOfRows) { throw new ArgumentException("Matrix must be quadratic.", "M"); } this.Symmetric = (M is MsrMatrix) && ((MsrMatrix)M).AssumeSymmetric; RowPart = M.RowPartitioning; int size = M.RowPartitioning.MpiSize, rank = M.RowPartitioning.MpiRank; Debug.Assert(M.RowPartitioning.MpiRank == M.ColPartition.MpiRank); Debug.Assert(M.RowPartitioning.MpiSize == M.ColPartition.MpiSize); m_comm = M.MPI_Comm; int LR; int[] col = null; double[] val = null; if (size == 1) { // serial init on one processor // ++++++++++++++++++++++++++++ using (new BlockTrace("serial init", tr)) { n = (int)M.RowPartitioning.TotalLength; int len; if (Symmetric) { // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len = M.GetGlobalNoOfUpperTriangularNonZeros() + n; } else { len = M.GetTotalNoOfNonZerosPerProcess(); } int Nrows = M.RowPartitioning.LocalLength; int cnt = 0; ia = new int[n + 1]; ja = new int[len]; IntPtr ObjectSize; if (UseDoublePrecision) { ObjectSize = (IntPtr)(((long)len) * sizeof(double)); } else { ObjectSize = (IntPtr)(((long)len) * sizeof(float)); } this.aPtr = Marshal.AllocHGlobal(ObjectSize); unsafe { float * a_S = (float *)aPtr; double *a_D = (double *)aPtr; for (int i = 0; i < Nrows; i++) { ia[i] = cnt + 1; // fortran indexing int iRow = M.RowPartitioning.i0 + i; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja[cnt] = iRow + 1; // fortran indexing if (UseDoublePrecision) { //a_D[cnt] = 0.0; *a_D = 0.0; } else { //a_S[cnt] = 0.0f; *(a_S) = 0.0f; } cnt++; a_D++; a_S++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja[cnt] = col[j] + 1; // fortran indexing if (UseDoublePrecision) { //a_D[cnt] = val[j]; *a_D = val[j]; } else { //a_S[cnt] = (float)(val[j]); *a_S = (float)(val[j]); } cnt++; a_D++; a_S++; } } } //if (M.GetTotalNoOfNonZeros() != len) // throw new Exception(); } ia[Nrows] = cnt + 1; // fortran indexing if (len != cnt) { throw new ApplicationException("internal error."); } } } } else { // collect matrix on processor 0 // +++++++++++++++++++++++++++++ using (new BlockTrace("Collect matrix on proc 0", tr)) { // Number of elements, start indices for index pointers // ==================================================== int len_loc; if (Symmetric) { // number of entries is: // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len_loc = M.GetLocalNoOfUpperTriangularNonZeros() + M.RowPartitioning.LocalLength; } else { len_loc = M.GetTotalNoOfNonZerosPerProcess(); } Partitioning part = new Partitioning(len_loc, m_comm); if (part.TotalLength > int.MaxValue) { throw new ApplicationException("too many matrix entries for PARDISO - more than maximum 32-bit signed integer"); } // local matrix assembly // ===================== int n_loc = M.RowPartitioning.LocalLength; int[] ia_loc = new int[n_loc]; int[] ja_loc = new int[len_loc]; double[] a_loc_D = null; float[] a_loc_S = null; using (new BlockTrace("local matrix assembly", tr)) { if (UseDoublePrecision) { a_loc_D = new double[len_loc]; } else { a_loc_S = new float[len_loc]; } { int cnt = 0; int i0 = (int)part.i0; for (int i = 0; i < n_loc; i++) { ia_loc[i] = cnt + 1 + i0; // fortran indexing int iRow = i + (int)M.RowPartitioning.i0; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja_loc[cnt] = iRow + 1; // fortran indexing if (UseDoublePrecision) { a_loc_D[cnt] = 0.0; } else { a_loc_S[cnt] = 0.0f; } cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja_loc[cnt] = col[j] + 1; // fortran indexing if (UseDoublePrecision) { a_loc_D[cnt] = val[j]; } else { a_loc_S[cnt] = (float)val[j]; } cnt++; } } } } if (cnt != len_loc) { throw new ApplicationException("internal error."); } } } // assemble complete matrix on proc. 0 // =================================== if (rank == 0) { n = M.RowPartitioning.TotalLength; // process 0: collect data from other processors // +++++++++++++++++++++++++++++++++++++++++++++ this.ia = new int[M.RowPartitioning.TotalLength + 1]; this.ja = new int[part.TotalLength]; int partLeng = part.TotalLength; //long partLeng2 = (long)part.TotalLength; //Console.WriteLine("Partitioning total length is as int: "+ partLeng + "and in 64-bit: "+ partLeng2); //if (UseDoublePrecision) //{ // Console.WriteLine("if UseDoublePrecision"); // this.a_D = new double[part.TotalLength]; //} //else //{ // Console.WriteLine("else..."); // this.a_S = new float[part.TotalLength]; //} IntPtr ObjectSize; if (UseDoublePrecision) { ObjectSize = (IntPtr)(((long)partLeng) * sizeof(double)); } else { ObjectSize = (IntPtr)(((long)partLeng) * sizeof(float)); } aPtr = Marshal.AllocHGlobal(ObjectSize); } else { aPtr = IntPtr.Zero; } //int partLeng = part.TotalLength; //long partLeng2 = (long)part.TotalLength; //Console.WriteLine("Partitioning total length is as int: "+ partLeng + "and in 64-bit: "+ partLeng2); Console.Out.Flush(); using (new BlockTrace("UNSAFE", tr)) { unsafe { float * pa_S = (float *)aPtr; double *pa_D = (double *)aPtr; int *displs = stackalloc int[size]; int *recvcounts = stackalloc int[size]; for (int i = 0; i < size; i++) { recvcounts[i] = part.GetLocalLength(i); displs[i] = part.GetI0Offest(i); } fixed(void *pa_loc_D = a_loc_D, pa_loc_S = a_loc_S) { if (UseDoublePrecision) { csMPI.Raw.Gatherv( (IntPtr)pa_loc_D, a_loc_D.Length, csMPI.Raw._DATATYPE.DOUBLE, (IntPtr)pa_D, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.DOUBLE, 0, m_comm); } else { csMPI.Raw.Gatherv( (IntPtr)pa_loc_S, a_loc_S.Length, csMPI.Raw._DATATYPE.FLOAT, (IntPtr)pa_S, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.FLOAT, 0, m_comm); } } fixed(void *pja_loc = ja_loc, pja = ja) { csMPI.Raw.Gatherv( (IntPtr)pja_loc, ja_loc.Length, csMPI.Raw._DATATYPE.INT, (IntPtr)pja, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.INT, 0, m_comm); } for (int i = 0; i < size; i++) { displs[i] = M.RowPartitioning.GetI0Offest(i); recvcounts[i] = M.RowPartitioning.GetLocalLength(i); } fixed(void *pia_loc = ia_loc, pia = ia) { csMPI.Raw.Gatherv( (IntPtr)pia_loc, ia_loc.Length, csMPI.Raw._DATATYPE.INT, (IntPtr)pia, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.INT, 0, m_comm); } } } if (rank == 0) { this.ia[M.RowPartitioning.TotalLength] = (int)part.TotalLength + 1; } ia_loc = null; ja_loc = null; a_loc_S = null; a_loc_D = null; GC.Collect(); } } } }
/// <summary> /// initializes this matrix to be a copy of <paramref name="mtx"/>; /// </summary> /// <param name="mtx"></param> public IJMatrix(IMutableMatrixEx mtx) { if (mtx.RowPartitioning.MPI_Comm != mtx.ColPartition.MPI_Comm) { throw new ArgumentException(); } if (mtx.RowPartitioning.IsMutable) { throw new ArgumentException(); } if (mtx.ColPartition.IsMutable) { throw new ArgumentException(); } m_RowPartition = mtx.RowPartitioning; m_ColPartition = mtx.ColPartition; if (mtx.NoOfRows != mtx.NoOfCols) { throw new ArgumentException("matrix must be quadratic.", "mtx"); } if (mtx.NoOfRows > (int.MaxValue - 2)) { throw new ApplicationException("unable to create HYPRE matrix: no. of matrix rows is larger than HYPRE index type (32 bit signed int);"); } if (mtx.NoOfCols > (int.MaxValue - 2)) { throw new ApplicationException("unable to create HYPRE matrix: no. of matrix columns is larger than HYPRE index type (32 bit signed int);"); } // matrix: init MPI_Comm comm = csMPI.Raw._COMM.WORLD; int ilower = (int)mtx.RowPartitioning.i0; int iupper = (int)ilower + mtx.RowPartitioning.LocalLength - 1; int jlower = (int)mtx.ColPartition.i0; int jupper = (int)jlower + mtx.ColPartition.LocalLength - 1; HypreException.Check(Wrappers.IJMatrix.Create(comm, ilower, iupper, jlower, jupper, out m_IJMatrix)); HypreException.Check(Wrappers.IJMatrix.SetObjectType(m_IJMatrix, Wrappers.Constants.HYPRE_PARCSR)); HypreException.Check(Wrappers.IJMatrix.Initialize(m_IJMatrix)); // matrix: set values, row by row ... int nrows, lmax = mtx.GetMaxNoOfNonZerosPerRow(); int[] rows = new int[1], cols = new int[lmax], ncols = new int[1]; double[] values = new double[lmax]; int LR; int[] col = null; double[] val = null; for (int i = 0; i < mtx.RowPartitioning.LocalLength; i++) { int iRowGlob = i + mtx.RowPartitioning.i0; LR = mtx.GetRow(iRowGlob, ref col, ref val); nrows = 1; rows[0] = iRowGlob; ncols[0] = LR; int cnt = 0; for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { cols[cnt] = col[j]; values[cnt] = val[j]; cnt++; } } if (cnt <= 0) { throw new ArgumentException(string.Format("Zero matrix row detected (local row index: {0}, global row index: {1}).", i, iRowGlob)); } HypreException.Check(Wrappers.IJMatrix.SetValues(m_IJMatrix, nrows, ncols, rows, cols, values)); } // matrix: assembly HypreException.Check(Wrappers.IJMatrix.Assemble(m_IJMatrix)); HypreException.Check(Wrappers.IJMatrix.GetObject(m_IJMatrix, out m_ParCSR_matrix)); }