/// <summary> /// initializes this matrix as a copy of the matrix <paramref name="M"/>. /// </summary> public Matrix(IMutableMatrixEx M, bool __UseDoublePrecision) { using (var tr = new FuncTrace()) { this.UseDoublePrecision = __UseDoublePrecision; if (M.RowPartitioning.IsMutable) { throw new NotSupportedException(); } if (M.ColPartition.IsMutable) { throw new NotSupportedException(); } if (M.NoOfCols != M.NoOfRows) { throw new ArgumentException("Matrix must be quadratic.", "M"); } this.Symmetric = (M is MsrMatrix) && ((MsrMatrix)M).AssumeSymmetric; RowPart = M.RowPartitioning; int size = M.RowPartitioning.MpiSize, rank = M.RowPartitioning.MpiRank; Debug.Assert(M.RowPartitioning.MpiRank == M.ColPartition.MpiRank); Debug.Assert(M.RowPartitioning.MpiSize == M.ColPartition.MpiSize); m_comm = M.MPI_Comm; int LR; int[] col = null; double[] val = null; if (size == 1) { // serial init on one processor // ++++++++++++++++++++++++++++ using (new BlockTrace("serial init", tr)) { n = (int)M.RowPartitioning.TotalLength; int len; if (Symmetric) { // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len = M.GetGlobalNoOfUpperTriangularNonZeros() + n; } else { len = M.GetTotalNoOfNonZerosPerProcess(); } int Nrows = M.RowPartitioning.LocalLength; int cnt = 0; ia = new int[n + 1]; ja = new int[len]; IntPtr ObjectSize; if (UseDoublePrecision) { ObjectSize = (IntPtr)(((long)len) * sizeof(double)); } else { ObjectSize = (IntPtr)(((long)len) * sizeof(float)); } this.aPtr = Marshal.AllocHGlobal(ObjectSize); unsafe { float * a_S = (float *)aPtr; double *a_D = (double *)aPtr; for (int i = 0; i < Nrows; i++) { ia[i] = cnt + 1; // fortran indexing int iRow = M.RowPartitioning.i0 + i; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja[cnt] = iRow + 1; // fortran indexing if (UseDoublePrecision) { //a_D[cnt] = 0.0; *a_D = 0.0; } else { //a_S[cnt] = 0.0f; *(a_S) = 0.0f; } cnt++; a_D++; a_S++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja[cnt] = col[j] + 1; // fortran indexing if (UseDoublePrecision) { //a_D[cnt] = val[j]; *a_D = val[j]; } else { //a_S[cnt] = (float)(val[j]); *a_S = (float)(val[j]); } cnt++; a_D++; a_S++; } } } //if (M.GetTotalNoOfNonZeros() != len) // throw new Exception(); } ia[Nrows] = cnt + 1; // fortran indexing if (len != cnt) { throw new ApplicationException("internal error."); } } } } else { // collect matrix on processor 0 // +++++++++++++++++++++++++++++ using (new BlockTrace("Collect matrix on proc 0", tr)) { // Number of elements, start indices for index pointers // ==================================================== int len_loc; if (Symmetric) { // number of entries is: // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len_loc = M.GetLocalNoOfUpperTriangularNonZeros() + M.RowPartitioning.LocalLength; } else { len_loc = M.GetTotalNoOfNonZerosPerProcess(); } Partitioning part = new Partitioning(len_loc, m_comm); if (part.TotalLength > int.MaxValue) { throw new ApplicationException("too many matrix entries for PARDISO - more than maximum 32-bit signed integer"); } // local matrix assembly // ===================== int n_loc = M.RowPartitioning.LocalLength; int[] ia_loc = new int[n_loc]; int[] ja_loc = new int[len_loc]; double[] a_loc_D = null; float[] a_loc_S = null; using (new BlockTrace("local matrix assembly", tr)) { if (UseDoublePrecision) { a_loc_D = new double[len_loc]; } else { a_loc_S = new float[len_loc]; } { int cnt = 0; int i0 = (int)part.i0; for (int i = 0; i < n_loc; i++) { ia_loc[i] = cnt + 1 + i0; // fortran indexing int iRow = i + (int)M.RowPartitioning.i0; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja_loc[cnt] = iRow + 1; // fortran indexing if (UseDoublePrecision) { a_loc_D[cnt] = 0.0; } else { a_loc_S[cnt] = 0.0f; } cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja_loc[cnt] = col[j] + 1; // fortran indexing if (UseDoublePrecision) { a_loc_D[cnt] = val[j]; } else { a_loc_S[cnt] = (float)val[j]; } cnt++; } } } } if (cnt != len_loc) { throw new ApplicationException("internal error."); } } } // assemble complete matrix on proc. 0 // =================================== if (rank == 0) { n = M.RowPartitioning.TotalLength; // process 0: collect data from other processors // +++++++++++++++++++++++++++++++++++++++++++++ this.ia = new int[M.RowPartitioning.TotalLength + 1]; this.ja = new int[part.TotalLength]; int partLeng = part.TotalLength; //long partLeng2 = (long)part.TotalLength; //Console.WriteLine("Partitioning total length is as int: "+ partLeng + "and in 64-bit: "+ partLeng2); //if (UseDoublePrecision) //{ // Console.WriteLine("if UseDoublePrecision"); // this.a_D = new double[part.TotalLength]; //} //else //{ // Console.WriteLine("else..."); // this.a_S = new float[part.TotalLength]; //} IntPtr ObjectSize; if (UseDoublePrecision) { ObjectSize = (IntPtr)(((long)partLeng) * sizeof(double)); } else { ObjectSize = (IntPtr)(((long)partLeng) * sizeof(float)); } aPtr = Marshal.AllocHGlobal(ObjectSize); } else { aPtr = IntPtr.Zero; } //int partLeng = part.TotalLength; //long partLeng2 = (long)part.TotalLength; //Console.WriteLine("Partitioning total length is as int: "+ partLeng + "and in 64-bit: "+ partLeng2); Console.Out.Flush(); using (new BlockTrace("UNSAFE", tr)) { unsafe { float * pa_S = (float *)aPtr; double *pa_D = (double *)aPtr; int *displs = stackalloc int[size]; int *recvcounts = stackalloc int[size]; for (int i = 0; i < size; i++) { recvcounts[i] = part.GetLocalLength(i); displs[i] = part.GetI0Offest(i); } fixed(void *pa_loc_D = a_loc_D, pa_loc_S = a_loc_S) { if (UseDoublePrecision) { csMPI.Raw.Gatherv( (IntPtr)pa_loc_D, a_loc_D.Length, csMPI.Raw._DATATYPE.DOUBLE, (IntPtr)pa_D, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.DOUBLE, 0, m_comm); } else { csMPI.Raw.Gatherv( (IntPtr)pa_loc_S, a_loc_S.Length, csMPI.Raw._DATATYPE.FLOAT, (IntPtr)pa_S, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.FLOAT, 0, m_comm); } } fixed(void *pja_loc = ja_loc, pja = ja) { csMPI.Raw.Gatherv( (IntPtr)pja_loc, ja_loc.Length, csMPI.Raw._DATATYPE.INT, (IntPtr)pja, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.INT, 0, m_comm); } for (int i = 0; i < size; i++) { displs[i] = M.RowPartitioning.GetI0Offest(i); recvcounts[i] = M.RowPartitioning.GetLocalLength(i); } fixed(void *pia_loc = ia_loc, pia = ia) { csMPI.Raw.Gatherv( (IntPtr)pia_loc, ia_loc.Length, csMPI.Raw._DATATYPE.INT, (IntPtr)pia, (IntPtr)recvcounts, (IntPtr)displs, csMPI.Raw._DATATYPE.INT, 0, m_comm); } } } if (rank == 0) { this.ia[M.RowPartitioning.TotalLength] = (int)part.TotalLength + 1; } ia_loc = null; ja_loc = null; a_loc_S = null; a_loc_D = null; GC.Collect(); } } } }
/// <summary> /// initializes this matrix as a copy of the matrix <paramref name="M"/>. /// </summary> /// <param name="M"></param> public Matrix(IMutableMatrixEx M) { if (M.RowPartitioning.IsMutable) { throw new NotSupportedException(); } if (M.ColPartition.IsMutable) { throw new NotSupportedException(); } if (M.NoOfCols != M.NoOfRows) { throw new ArgumentException("Matrix must be quadratic.", "M"); } this.Symmetric = (M is MsrMatrix) && ((MsrMatrix)M).AssumeSymmetric; RowPart = M.RowPartitioning; int size = M.RowPartitioning.MpiSize, rank = M.RowPartitioning.MpiRank; Debug.Assert(M.RowPartitioning.MpiRank == M.ColPartition.MpiRank); Debug.Assert(M.RowPartitioning.MpiSize == M.ColPartition.MpiSize); var comm = M.MPI_Comm; int LR; int[] col = null; double[] val = null; if (size == 1) { // serial init on one processor // ++++++++++++++++++++++++++++ n = (int)M.RowPartitioning.TotalLength; int len; if (Symmetric) { // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len = M.GetGlobalNoOfUpperTriangularNonZeros() + n; } else { len = M.GetTotalNoOfNonZerosPerProcess(); } int Nrows = M.RowPartitioning.LocalLength; int cnt = 0; ia = new int[n + 1]; ja = new int[len]; a = new double[len]; for (int i = 0; i < Nrows; i++) { ia[i] = cnt + 1; // fortran indexing int iRow = M.RowPartitioning.i0 + i; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja[cnt] = iRow + 1; // fortran indexing a[cnt] = 0.0; cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja[cnt] = col[j] + 1; // fortran indexing a[cnt] = val[j]; cnt++; } } } //if (M.GetTotalNoOfNonZeros() != len) // throw new Exception(); } ia[Nrows] = cnt + 1; // fortran indexing if (len != cnt) { throw new ApplicationException("internal error."); } } else { // collect matrix on processor 0 // +++++++++++++++++++++++++++++ // Number of elements, start indices for index pointers // ==================================================== int len_loc; if (Symmetric) { // number of entries is: // upper triangle + diagonal (diagonal entries are // always required, even if 0.0, for symmetric matrices in PARDISO) len_loc = M.GetLocalNoOfUpperTriangularNonZeros() + M.RowPartitioning.LocalLength; } else { len_loc = M.GetTotalNoOfNonZerosPerProcess(); } Partitioning part = new Partitioning(len_loc, comm); if (part.TotalLength > int.MaxValue) { throw new ApplicationException("too many matrix entries for PARDISO - more than maximum 32-bit signed integer"); } // local matrix assembly // ===================== int n_loc = M.RowPartitioning.LocalLength; int[] ia_loc = new int[n_loc]; int[] ja_loc = new int[len_loc]; double[] a_loc = new double[len_loc]; { int cnt = 0; int i0 = (int)part.i0; for (int i = 0; i < n_loc; i++) { ia_loc[i] = cnt + 1 + i0; // fortran indexing int iRow = i + (int)M.RowPartitioning.i0; LR = M.GetRow(iRow, ref col, ref val); double diagelem = M[iRow, iRow]; if (Symmetric && diagelem == 0) { // in the symmetric case, we always need to provide the diagonal element ja_loc[cnt] = iRow + 1; // fortran indexing a_loc[cnt] = 0.0; cnt++; } for (int j = 0; j < LR; j++) { if (val[j] != 0.0) { if (Symmetric && col[j] < iRow) { // entry is in lower triangular matrix -> ignore (for symmetric mtx.) continue; } else { ja_loc[cnt] = col[j] + 1; // fortran indexing a_loc[cnt] = val[j]; cnt++; } } } } if (cnt != len_loc) { throw new ApplicationException("internal error."); } } // assemble complete matrix on proc. 0 // =================================== if (rank == 0) { n = M.RowPartitioning.TotalLength; // process 0: collect data from other processors // +++++++++++++++++++++++++++++++++++++++++++++ this.ia = new int[M.RowPartitioning.TotalLength + 1]; this.ja = new int[part.TotalLength]; this.a = new double[part.TotalLength]; Array.Copy(ia_loc, 0, this.ia, 0, ia_loc.Length); Array.Copy(ja_loc, 0, this.ja, 0, ja_loc.Length); Array.Copy(a_loc, 0, this.a, 0, a_loc.Length); unsafe { fixed(int *pia = &this.ia[0], pja = &this.ja[0]) { fixed(double *pa = &this.a[0]) { for (int rcv_rank = 1; rcv_rank < size; rcv_rank++) { MPI_Status status; csMPI.Raw.Recv((IntPtr)(pa + part.GetI0Offest(rcv_rank)), part.GetLocalLength(rcv_rank), csMPI.Raw._DATATYPE.DOUBLE, rcv_rank, 321555 + rcv_rank, comm, out status); csMPI.Raw.Recv((IntPtr)(pja + part.GetI0Offest(rcv_rank)), part.GetLocalLength(rcv_rank), csMPI.Raw._DATATYPE.INT, rcv_rank, 32155 + rcv_rank, comm, out status); csMPI.Raw.Recv((IntPtr)(pia + M.RowPartitioning.GetI0Offest(rcv_rank)), M.RowPartitioning.GetLocalLength(rcv_rank), csMPI.Raw._DATATYPE.INT, rcv_rank, 3215 + rcv_rank, comm, out status); } } } } this.ia[M.RowPartitioning.TotalLength] = (int)part.TotalLength + 1; } else { // send data to process 0 // ++++++++++++++++++++++ unsafe { fixed(void *pia = &ia_loc[0], pja = &ja_loc[0], pa = &a_loc[0]) { csMPI.Raw.Send((IntPtr)pa, a_loc.Length, csMPI.Raw._DATATYPE.DOUBLE, 0, 321555 + rank, csMPI.Raw._COMM.WORLD); csMPI.Raw.Send((IntPtr)pja, ja_loc.Length, csMPI.Raw._DATATYPE.INT, 0, 32155 + rank, csMPI.Raw._COMM.WORLD); csMPI.Raw.Send((IntPtr)pia, ia_loc.Length, csMPI.Raw._DATATYPE.INT, 0, 3215 + rank, csMPI.Raw._COMM.WORLD); } } } ia_loc = null; ja_loc = null; a_loc = null; GC.Collect(); } //if(rank == 0) { // Console.WriteLine("PARDISO.Matrix: Number of nonzeros: " + this.a.Length); // Console.WriteLine("PARDISO.Matrix: sum of entries: " + this.a.Sum()); //} //SaveToTextFile("C:\\tmp\\pard.txt"); }
/// <summary> /// performs a parallel inversion of this permutation /// </summary> /// <returns></returns> public Permutation Invert() { using (new FuncTrace()) { Permutation inv = new Permutation(this); int localLength = m_Partition.LocalLength; //m_LocalLengths[m_Master.MyRank]; int size; MPI.Wrappers.csMPI.Raw.Comm_Size(m_Comm, out size); inv.m_Values = new long[localLength]; long myi0Offset = m_Partition.i0; //m_i0Offset[m_Master.MyRank]; Many2ManyMessenger <PermutationEntry> m2m = new Many2ManyMessenger <PermutationEntry>(m_Comm); int[] NoOfItemsToSent = new int[size]; // calc i0's for each process // ========================== long[] i0 = new long[size + 1]; for (int i = 1; i <= size; i++) { i0[i] = i0[i - 1] + m_Partition.GetLocalLength(i - 1);// m_LocalLengths[i - 1]; } // do local inversion and collect items // ==================================== List <PermutationEntry>[] itemsToSend = new List <PermutationEntry> [size]; for (int p = 0; p < size; p++) { itemsToSend[p] = new List <PermutationEntry>(); } for (int i = 0; i < localLength; i++) { // decide wether inversion of entry i is local or has to be transmitted ... if (m_Values[i] >= myi0Offset && m_Values[i] < (myi0Offset + localLength)) { // local inv.m_Values[this.m_Values[i] - myi0Offset] = i + myi0Offset; } else { // transmit // find target processor int targProc = m_Partition.FindProcess(m_Values[i]); NoOfItemsToSent[targProc]++; // collect item PermutationEntry pe; pe.Index = myi0Offset + i; pe.PermVal = m_Values[i]; itemsToSend[targProc].Add(pe); } } // setup messenger // =============== for (int i = 0; i < size; i++) { if (NoOfItemsToSent[i] > 0) { m2m.SetCommPath(i, NoOfItemsToSent[i]); } } m2m.CommitCommPaths(); // transmit data // ============= //csMPI.Raw.Barrier(csMPI.Raw.MPI_COMM_WORLD); //Console.WriteLine("one"); //csMPI.Raw.Barrier(csMPI.Raw.MPI_COMM_WORLD); //Console.WriteLine("two"); //csMPI.Raw.Barrier(csMPI.Raw.MPI_COMM_WORLD); //Console.WriteLine("three"); m2m.StartTransmission(1); for (int p = 0; p < size; p++) { if (NoOfItemsToSent[p] <= 0) { continue; } //Many2ManyMessenger.Buffer<PermEntry> sndbuf = new Many2ManyMessenger.Buffer<PermEntry>(m2m, false, p); PermutationEntry[] items = itemsToSend[p].ToArray(); m2m.SendBuffers(p).CopyFrom(items, 0); m2m.TransmittData(p); } m2m.FinishBlocking(); // invert received items // ===================== for (int p = 0; p < size; p++) { Many2ManyMessenger <PermutationEntry> .Buffer rcvbuf = m2m.ReceiveBuffers(p); if (rcvbuf == null) { continue; // no data from process no. p } int cnt = rcvbuf.Count; PermutationEntry[] items = new PermutationEntry[cnt]; rcvbuf.CopyTo(items, 0); for (int i = 0; i < cnt; i++) { inv.m_Values[items[i].PermVal - myi0Offset] = items[i].Index; } } // finalize // ======== m2m.Dispose(); return(inv); } }