/// <summary> /// only for debugging and testing; converts the matrix to a full matrix; /// when running in parallel, the matrix is collected on process 0. /// </summary> /// <returns> /// null on all MPI processes, except on rank 0; /// </returns> static public MultidimensionalArray ToFullMatrixOnProc0(this IMutableMatrixEx tis) { var comm = tis.MPI_Comm; int Rank = tis.RowPartitioning.MpiRank; int Size = tis.RowPartitioning.MpiSize; double[,] ret = null; if (Rank == 0) { ret = new double[tis.NoOfRows, tis.NoOfCols]; } SerialisationMessenger sms = new SerialisationMessenger(comm); if (Rank > 0) { sms.SetCommPath(0); } sms.CommitCommPaths(); Tuple <int, int[], double[]>[] data; { int L = tis.RowPartitioning.LocalLength; data = new Tuple <int, int[], double[]> [L]; int i0 = (int)tis.RowPartitioning.i0; for (int i = 0; i < L; i++) { double[] val = null; // this mem. must be inside the loop/allocated for every i and cannot be reused because we need all rows later. int[] col = null; int Lr = tis.GetRow(i + i0, ref col, ref val); data[i] = new Tuple <int, int[], double[]>(Lr, col, val); } } if (Rank > 0) { sms.Transmitt(0, data); } int rcvProc = 0; if (Rank == 0) { do { int i0 = (int)tis.RowPartitioning.GetI0Offest(rcvProc); if (data.Length != tis.RowPartitioning.GetLocalLength(rcvProc)) { throw new ApplicationException("internal error"); } for (int i = 0; i < data.Length; i++) { //foreach (MsrMatrix.MatrixEntry entry in data[i]) { // if (entry.m_ColIndex >= 0) // ret[i + i0, entry.m_ColIndex] = entry.Value; //} int Lr = data[i].Item1; int[] col = data[i].Item2; double[] val = data[i].Item3; for (int lr = 0; lr < Lr; lr++) { ret[i + i0, col[lr]] = val[lr]; } } } while (sms.GetNext(out rcvProc, out data)); } else { if (sms.GetNext(out rcvProc, out data)) { throw new ApplicationException("internal error"); } } if (Rank == 0) { var _ret = MultidimensionalArray.Create(ret.GetLength(0), ret.GetLength(1)); for (int i = 0; i < _ret.NoOfRows; i++) { for (int j = 0; j < _ret.NoOfCols; j++) { _ret[i, j] = ret[i, j]; } } return(_ret); } else { return(null); } }
private static byte[] SyncEdgeTagsOverMPI(Dictionary <string, byte> EdgeTagNames_Reverse) { csMPI.Raw.Comm_Rank(csMPI.Raw._COMM.WORLD, out int MyRank); var To0 = new Dictionary <int, KeyValuePair <string, byte>[]>(); if (MyRank > 0) { To0.Add(0, EdgeTagNames_Reverse.ToArray()); } var allData = SerialisationMessenger.ExchangeData(To0); bool[] usedEdgeTags = new bool[byte.MaxValue + 1]; foreach (var et in EdgeTagNames_Reverse.Values) { usedEdgeTags[et] = true; } byte GetNewEt() { for (int i = 1; i < usedEdgeTags.Length; i++) { if (i >= GridCommons.FIRST_PERIODIC_BC_TAG) { throw new ApplicationException("Running out of edge tags."); } if (usedEdgeTags[i] == false) { usedEdgeTags[i] = true; return((byte)i); } } throw new ApplicationException("Running out of edge tags."); } if (MyRank == 0) { foreach (var kv in allData) { var backData = kv.Value; for (int i = 0; i < backData.Length; i++) { if (EdgeTagNames_Reverse.ContainsKey(backData[i].Key)) { } else { byte sugKey = backData[i].Value; if (usedEdgeTags[sugKey]) { sugKey = GetNewEt(); } EdgeTagNames_Reverse.Add(backData[i].Key, sugKey); } } } } var AllEts = EdgeTagNames_Reverse.ToArray().MPIBroadcast(0); byte[] EtTanslations = (byte.MaxValue + 1).ForLoop(i => (byte)i); bool AnyTranslation = false; if (MyRank > 0) { foreach (var kv in AllEts) { if (EdgeTagNames_Reverse.ContainsKey(kv.Key)) { byte oldVal = EdgeTagNames_Reverse[kv.Key]; byte newVal = kv.Value; AnyTranslation = AnyTranslation | (oldVal != newVal); EdgeTagNames_Reverse[kv.Key] = newVal; EtTanslations[oldVal] = newVal; } } } AnyTranslation = AnyTranslation.MPIOr(); if (AnyTranslation) { return(EtTanslations); } else { return(null); } }
/// <summary> /// /// </summary> /// <param name="TargetMappingIndex"></param> /// <param name="outputPartitioning"> /// Partitioning of the new grid, resp the return array. /// </param> /// <returns> /// - 1st index: cell index in new grid, correlates with <paramref name="outputPartitioning"/>. /// - 2nd index: enumeration over cells (in the old grid) which are combined in the new grid. /// For cells with refinement, always one entry, for cells which are coarsened a greater number of entries. /// If null, the cell is not changed. /// - content: Subdivision leaf index, correlates with 2nd index of <see cref="KrefS_SubdivLeaves"/>, can be used as an input to <see cref="GetSubdivBasisTransform(int, int, int)"/>. /// </returns> public int[][] GetTargetMappingIndex(IPartitioning outputPartitioning) { using (new FuncTrace()) { Debug.Assert(DestGlobalId.Length == MappingIndex.Length); Debug.Assert(OldGlobalId.Length == MappingIndex.Length); int oldJ = DestGlobalId.Length; // Caching // ======= if (m_TargetMappingIndex != null) { // caching if (m_TargetMappingIndex.Length != outputPartitioning.LocalLength) { throw new ArgumentException("Length mismatch of output list and output partition."); } return(m_TargetMappingIndex); } // local evaluation, prepare communication // ======================================= m_TargetMappingIndex = new int[outputPartitioning.LocalLength][]; int j0Dest = outputPartitioning.i0; // keys: processors which should receive data from this processor Dictionary <int, GetTargetMapping_Helper> AllSendData = new Dictionary <int, GetTargetMapping_Helper>(); for (int j = 0; j < oldJ; j++) { int[] MappingIndex_j = MappingIndex[j]; if (MappingIndex_j != null) { Debug.Assert(TargetIdx[j].Length == MappingIndex_j.Length); int L = MappingIndex_j.Length; for (int l = 0; l < L; l++) { int jDest = TargetIdx[j][l]; int MapIdx = MappingIndex_j[l]; if (outputPartitioning.IsInLocalRange(jDest)) { int[] destCollection = m_TargetMappingIndex[jDest - j0Dest]; ArrayTools.AddToArray(MapIdx, ref destCollection); m_TargetMappingIndex[jDest - j0Dest] = destCollection; } else { int targProc = outputPartitioning.FindProcess(jDest); GetTargetMapping_Helper dataTargPrc; if (!AllSendData.TryGetValue(targProc, out dataTargPrc)) { dataTargPrc = new GetTargetMapping_Helper(); AllSendData.Add(targProc, dataTargPrc); } dataTargPrc.TargetIndices.Add(jDest); dataTargPrc.Items.Add(MapIdx); } } } else { Debug.Assert(TargetIdx[j].Length == 1); } } // communication // ============= var AllRcvData = SerialisationMessenger.ExchangeData(AllSendData, outputPartitioning.MPI_Comm); foreach (var kv in AllRcvData) { int rcvProc = kv.Key; j0Dest = outputPartitioning.GetI0Offest(rcvProc); var TIdxs = kv.Value.TargetIndices; var TVals = kv.Value.Items; Debug.Assert(TIdxs.Count == TVals.Count); int L = TIdxs.Count; for (int l = 0; l < L; l++) { int idx = TIdxs[l] - j0Dest; Debug.Assert(outputPartitioning.IsInLocalRange(idx)); int[] destCollection = m_TargetMappingIndex[idx]; ArrayTools.AddToArray(TVals[idx], ref destCollection); m_TargetMappingIndex[idx] = destCollection; } } // return // ====== return(m_TargetMappingIndex); } }
/// <summary> /// Saves the matrix in a custom sparse text format; /// Mainly for importing into MATLAB; /// </summary> /// <param name="path">Path to the text file</param> /// <remarks> /// MATLAB code for importing the matrix (save as file 'ReadMsr.m'): /// <code> /// function Mtx = ReadMsr(filename) /// /// fid = fopen(filename); /// % matrix dimensions /// % ----------------- /// NoOfRows = fscanf(fid,'%d',1); /// NoOfCols = fscanf(fid,'%d',1); /// NonZeros = fscanf(fid,'%d',1); /// cnt = 1; /// % read row and column array /// % ------------------------- /// iCol = zeros(NonZeros,1); /// iRow = zeros(NonZeros,1); /// entries = zeros(NonZeros,1); /// l0 = 0; /// str = char(zeros(1,6)); /// for i = 1:NoOfRows /// NonZerosInRow = fscanf(fid,'%d',1); /// if(l0 ~= NonZerosInRow) /// str = char(zeros(1,NonZerosInRow*6)); /// for j = 1:NonZerosInRow /// i0 = 1+(j-1)*6; /// str(i0:i0+1) = '%f'; /// str(i0+3:i0+4) = '%f'; /// end /// end /// R = fscanf(fid,str,2*NonZerosInRow); /// R2 = reshape(R',2,NonZerosInRow); /// ind = cnt:(cnt+NonZerosInRow-1); /// iCol(ind) = R2(1,:); /// iRow(ind) = i; /// entries(ind) = R2(2,:); /// /// cnt = cnt + NonZerosInRow; /// end /// fclose(fid); /// /// if (cnt-1) < NonZeros /// iCol = iCol(1:(cnt-1),1); /// iRow = iRow(1:(cnt-1),1); /// entries = entries(1:(cnt-1),1); /// end /// /// % create sparse matrix /// % -------------------- /// Mtx = sparse(iRow,iCol+1,entries,NoOfRows,NoOfCols,NonZeros); /// /// </code> /// </remarks> /// <param name="M"> /// this pointer of extension method /// </param> static public void SaveToTextFileSparse(this IMutableMatrixEx M, string path) { using (new FuncTrace()) { int rank, size; csMPI.Raw.Comm_Rank(M.MPI_Comm, out rank); csMPI.Raw.Comm_Size(M.MPI_Comm, out size); SerialisationMessenger sms = new SerialisationMessenger(M.MPI_Comm); int NoOfNonZeros = M.GetTotalNoOfNonZeros(); if (rank == 0) { sms.CommitCommPaths(); // receive data from other processors MsrMatrix.MatrixEntry[][] entries = M.GetAllEntries(); if (size > 1) { Array.Resize(ref entries, (int)(M.RowPartitioning.TotalLength)); } Helper rcvdata; int rcvRank; while (sms.GetNext(out rcvRank, out rcvdata)) { Array.Copy(rcvdata.entries, 0, entries, (int)M.RowPartitioning.GetI0Offest(rcvRank), rcvdata.entries.Length); } // open file StreamWriter stw = new StreamWriter(path); // serialize matrix data stw.WriteLine(M.RowPartitioning.TotalLength); // number of rows stw.WriteLine(M.NoOfCols); // number of columns stw.WriteLine(NoOfNonZeros); // number of non-zero entries in Matrix (over all MPI-processors) for (int i = 0; i < entries.Length; i++) { MsrMatrix.MatrixEntry[] row = entries[i]; int NonZPRow = 0; foreach (MsrMatrix.MatrixEntry e in row) { if (e.ColIndex >= 0 && e.Value != 0.0) { NonZPRow++; } } stw.Write(NonZPRow); stw.Write(" "); foreach (MsrMatrix.MatrixEntry e in row) { if (e.ColIndex >= 0 && e.Value != 0.0) { stw.Write(e.ColIndex); stw.Write(" "); stw.Write(e.Value.ToString("E16", NumberFormatInfo.InvariantInfo)); stw.Write(" "); } } stw.WriteLine(); } // finalize stw.Flush(); stw.Close(); } else { sms.SetCommPath(0); sms.CommitCommPaths(); var entries = M.GetAllEntries(); var c = new Helper(); c.entries = entries; sms.Transmitt(0, c); MsrMatrix.MatrixEntry[][] dummy; int dummy_; if (sms.GetNext <MsrMatrix.MatrixEntry[][]>(out dummy_, out dummy)) { throw new ApplicationException("error in app"); } } sms.Dispose(); } }
/// <summary> /// Like <see cref="Evaluate(double, IEnumerable{DGField}, MultidimensionalArray, double, MultidimensionalArray, BitArray, int[])"/>, /// but with MPI-Exchange /// </summary> public int EvaluateParallel(double alpha, IEnumerable <DGField> Flds, MultidimensionalArray Points, double beta, MultidimensionalArray Result, BitArray UnlocatedPoints = null) { using (new FuncTrace()) { MPICollectiveWatchDog.Watch(); int L = Points != null ? Points.NoOfRows : 0; int MPIsize = m_Context.MpiSize; int D = m_Context.SpatialDimension; if (UnlocatedPoints != null) { if (UnlocatedPoints.Length != L) { throw new ArgumentException("Length mismatch"); } } // evaluate locally // ================ var unlocated = new System.Collections.BitArray(L); int NoOfUnlocated; if (L > 0) { NoOfUnlocated = this.Evaluate(alpha, Flds, Points, beta, Result, unlocated); } else { NoOfUnlocated = 0; } // return, if there are no unlocalized points // ========================================== int TotNoOfUnlocated = NoOfUnlocated.MPISum(); if (TotNoOfUnlocated <= 0) { if (UnlocatedPoints != null) { UnlocatedPoints.SetAll(false); } return(0); } // copy unlocalized to separate array // ================================== double[,] localUnlocated = new double[NoOfUnlocated, D]; // MultidimensionalArray does not allow zero length -- so use double[,] instead int[] IndexToOrgIndex = new int[NoOfUnlocated]; int u = 0; for (int i = 0; i < L; i++) { if (unlocated[i]) { localUnlocated.SetRowPt(u, Points.GetRowPt(i)); IndexToOrgIndex[u] = i; u++; } } Debug.Assert(u == NoOfUnlocated); // collect on all ranks -- this won't scale well, but it may work // ============================================================== MultidimensionalArray globalUnlocated; int[] WhoIsInterestedIn; // index: point index, corresponds with 'globalUnlocated' rows; content: rank which needs the result int[] OriginalIndex; // index: detto; content: index which the point had on the processor that sent it. double[][,] __globalUnlocated; int LL; { __globalUnlocated = localUnlocated.MPIAllGatherO(); Debug.Assert(__globalUnlocated.Length == MPIsize); Debug.Assert(__globalUnlocated.Select(aa => aa.GetLength(0)).Sum() == TotNoOfUnlocated); Debug.Assert(__globalUnlocated[m_Context.MpiRank].GetLength(0) == NoOfUnlocated); LL = TotNoOfUnlocated - NoOfUnlocated; if (LL > 0) { globalUnlocated = MultidimensionalArray.Create(LL, D); } else { globalUnlocated = null; } WhoIsInterestedIn = new int[LL]; OriginalIndex = new int[LL]; int g = 0; for (int r = 0; r < MPIsize; r++) // concat all point arrays from all processors { if (r == m_Context.MpiRank) { continue; } double[,] __globalPart = __globalUnlocated[r]; int Lr = __globalPart.GetLength(0); if (Lr > 0) { globalUnlocated.ExtractSubArrayShallow(new[] { g, 0 }, new[] { g + Lr - 1, D - 1 }).Acc2DArray(1.0, __globalPart); } for (int i = 0; i < Lr; i++) { WhoIsInterestedIn[i + g] = r; OriginalIndex[i + g] = i; } g += Lr; } } // try to evaluate the so-far-unlocalized points // --------------------------------------------- var unlocated2 = new System.Collections.BitArray(LL); var Result2 = LL > 0 ? MultidimensionalArray.Create(LL, Flds.Count()) : null; int NoOfUnlocated2 = LL > 0 ? this.Evaluate(1.0, Flds, globalUnlocated, 0.0, Result2, unlocated2) : 0; // backward MPI sending // -------------------- IDictionary <int, EvaluateParallelHelper> resultFromOtherProcs; { var backSend = new Dictionary <int, EvaluateParallelHelper>(); for (int ll = 0; ll < LL; ll++) { if (!unlocated2[ll]) { int iTarget = WhoIsInterestedIn[ll]; Debug.Assert(iTarget != m_Context.MpiRank); if (!backSend.TryGetValue(iTarget, out EvaluateParallelHelper eph)) { eph = new EvaluateParallelHelper(); backSend.Add(iTarget, eph); } eph.OriginalIndices.Add(OriginalIndex[ll]); eph.Results.Add(Result2.GetRow(ll)); } } resultFromOtherProcs = SerialisationMessenger.ExchangeData(backSend); } // fill the results from other processors // ====================================== foreach (var res in resultFromOtherProcs.Values) { int K = res.OriginalIndices.Count(); Debug.Assert(res.OriginalIndices.Count == res.Results.Count); for (int k = 0; k < K; k++) { int iOrg = IndexToOrgIndex[res.OriginalIndices[k]]; if (unlocated[iOrg] == true) { NoOfUnlocated--; } unlocated[iOrg] = false; Result.AccRow(iOrg, alpha, res.Results[k]); } } // Return // ====== if (UnlocatedPoints != null) { for (int l = 0; l < L; l++) { UnlocatedPoints[l] = unlocated[l]; } } return(NoOfUnlocated); } }
public void ApplyToVector <I>(IList <I> input, IList <I[]> output, IPartitioning outputPartitioning) { using (new FuncTrace()) { Debug.Assert(DestGlobalId.Length == MappingIndex.Length); Debug.Assert(OldGlobalId.Length == MappingIndex.Length); int oldJ = DestGlobalId.Length; if (input.Count != oldJ) { throw new ArgumentException("Mismatch between input vector length and current data length."); } if (output.Count != outputPartitioning.LocalLength) { throw new ArgumentException("Length mismatch of output list and output partition."); } int j0Dest = outputPartitioning.i0; // keys: processors which should receive data from this processor Dictionary <int, ApplyToVector_Helper <I> > AllSendData = new Dictionary <int, ApplyToVector_Helper <I> >(); for (int j = 0; j < oldJ; j++) { I data_j = input[j]; foreach (int jDest in TargetIdx[j]) { if (outputPartitioning.IsInLocalRange(jDest)) { I[] destCollection = output[jDest - j0Dest]; ArrayTools.AddToArray(data_j, ref destCollection); output[jDest - j0Dest] = destCollection; } else { int targProc = outputPartitioning.FindProcess(jDest); ApplyToVector_Helper <I> dataTargPrc; if (!AllSendData.TryGetValue(targProc, out dataTargPrc)) { dataTargPrc = new ApplyToVector_Helper <I>(); AllSendData.Add(targProc, dataTargPrc); } dataTargPrc.TargetIndices.Add(jDest); dataTargPrc.Items.Add(data_j); } } } var AllRcvData = SerialisationMessenger.ExchangeData(AllSendData, outputPartitioning.MPI_Comm); foreach (var kv in AllRcvData) { int rcvProc = kv.Key; j0Dest = outputPartitioning.GetI0Offest(rcvProc); var TIdxs = kv.Value.TargetIndices; var TVals = kv.Value.Items; Debug.Assert(TIdxs.Count == TVals.Count); int L = TIdxs.Count; for (int l = 0; l < L; l++) { int idx = TIdxs[l] - j0Dest; Debug.Assert(outputPartitioning.IsInLocalRange(idx)); I[] destCollection = output[idx]; ArrayTools.AddToArray(TVals[idx], ref destCollection); output[idx] = destCollection; } } } }
/// <summary> /// MPI update of a bit-array /// </summary> /// <param name="b"></param> /// <param name="GridData"></param> static public void MPIExchange(this BitArray b, IGridData GridData) { if (b.Length != GridData.iLogicalCells.NoOfCells) { throw new ArgumentException("length must be equal to number of cells.", "b"); } if (GridData.CellPartitioning.MpiSize > 1) { // external { int rank, size; csMPI.Raw.Comm_Rank(csMPI.Raw._COMM.WORLD, out rank); csMPI.Raw.Comm_Size(csMPI.Raw._COMM.WORLD, out size); // setup messenger SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); sms.SetCommPathsAndCommit(GridData.iParallel.ProcessesToSendTo); // send data for (int p = 0; p < size; p++) { int[] sendlist = GridData.iParallel.SendCommLists[p]; if (sendlist == null) { continue; } int L = sendlist.Length; System.Collections.BitArray packet_for_p = new System.Collections.BitArray(L, false); for (int l = 0; l < L; l++) { packet_for_p[l] = b[sendlist[l]]; } sms.Transmitt(p, packet_for_p); } // receive data System.Collections.BitArray rcv_dat; int rcv_rank; while (sms.GetNext(out rcv_rank, out rcv_dat)) { int insertAt = GridData.iParallel.RcvCommListsInsertIndex[rcv_rank]; if (GridData.iParallel.RcvCommListsNoOfItems[rcv_rank] != rcv_dat.Count) { throw new ApplicationException("internal error."); } int C = rcv_dat.Count; for (int i = 0; i < C; i++) { b[insertAt + i] = rcv_dat[i]; } } // dispose sms.Dispose(); } } }
/// <summary> /// matrix assembly; must be called by each implementation, /// </summary> /// <param name="M"></param> protected void PackMatrix(IMutableMatrixEx M) { ilPSP.MPICollectiveWatchDog.Watch(); IPartitioning rp = M.RowPartitioning; IPartitioning cp = m_ColPart; // define Comm List // ================ SortedDictionary <int, List <int> > CommLists = new SortedDictionary <int, List <int> >(); // keys: processor rank p // values: List of global indices, which processor p needs to send to this processor int Lr; double[] val = null; int[] col = null; int L = rp.LocalLength; int i0 = (int)rp.i0; for (int iLoc = 0; iLoc < L; iLoc++) // loop over all matrix rows... { int iGlob = i0 + iLoc; //MsrMatrix.MatrixEntry[] row = (asMsr==null) ? M.GetRow(iGlob) : asMsr.GetRowShallow(iGlob); Lr = M.GetOccupiedColumnIndices(iGlob, ref col); for (int j = 0; j < Lr; j++) // loop over all nonzero entries in row 'iGlob' { int jGlob = col[j]; if (cp.i0 <= jGlob && jGlob < (cp.i0 + cp.LocalLength)) { // Entry on current processor } else { int proc = cp.FindProcess(jGlob); // Entry on Processor proc if (!CommLists.ContainsKey(proc)) { CommLists.Add(proc, new List <int>()); } List <int> CommList_proc = CommLists[proc]; if (!CommList_proc.Contains(jGlob)) // a lot of room for optimization { CommList_proc.Add(jGlob); } } } } // sort com list // ============= { foreach (List <int> cl in CommLists.Values) { cl.Sort(); } } // define matrix // ============= { TempCSR intTmp = new TempCSR(); SortedDictionary <int, ExternalTmp> extTmp = new SortedDictionary <int, ExternalTmp>(); foreach (int proc in CommLists.Keys) { extTmp.Add(proc, new ExternalTmp()); } for (int iLoc = 0; iLoc < L; iLoc++) { int iGlob = i0 + iLoc; Lr = M.GetRow(iGlob, ref col, ref val); for (int j = 0; j < Lr; j++) { int jGlob = col[j]; double Value = val[j]; bool bIsDiag = (iGlob == jGlob); if (cp.i0 <= jGlob && jGlob < (cp.i0 + cp.LocalLength)) { // Entry on current processor intTmp.AddEntry(jGlob - (int)cp.i0, Value, bIsDiag); } else { int proc = cp.FindProcess(jGlob); // Entry on Processor proc List <int> CommList_proc = CommLists[proc]; int jloc = CommList_proc.IndexOf(jGlob); ExternalTmp et = extTmp[proc]; et.AddEntry(jloc, jGlob, Value); } } intTmp.NextRow(); foreach (ExternalTmp et in extTmp.Values) { et.NextRow(); } } m_LocalMtx = AssembleFinalFormat(intTmp); ExtMatrix = new Dictionary <int, External>(); foreach (int proc in extTmp.Keys) { ExtMatrix.Add(proc, extTmp[proc].GetFinalObj()); } } // send/receive & transform Comm lists // ==================================== { SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); SortedDictionary <int, int[]> CommListsTo = new SortedDictionary <int, int[]>(); foreach (int proc in CommLists.Keys) { sms.SetCommPath(proc); } sms.CommitCommPaths(); foreach (int proc in CommLists.Keys) { sms.Transmitt(proc, CommLists[proc].ToArray()); } int _proc; int[] CommListReceived; sms.GetNext(out _proc, out CommListReceived); int Lcol = m_ColPart.LocalLength; int i0col = (int)m_ColPart.i0; while (CommListReceived != null) { // convert indices to local coordinates for (int i = 0; i < CommListReceived.Length; i++) { CommListReceived[i] -= i0col; // check: if (CommListReceived[i] < 0 || CommListReceived[i] >= Lcol) { throw new ApplicationException("internal error: something wrong with received Comm List."); } } CommListsTo.Add(_proc, CommListReceived); sms.GetNext(out _proc, out CommListReceived); } sms.Dispose(); m_SpmvCommPattern = new SpmvCommPattern(); m_SpmvCommPattern.ComLists = CommListsTo; } // record the number of elements which we receive // ============================================== { m_SpmvCommPattern.NoOfReceivedEntries = new Dictionary <int, int>(); foreach (int p in CommLists.Keys) { m_SpmvCommPattern.NoOfReceivedEntries.Add(p, CommLists[p].Count); } } }
/// <summary> /// Computes a grid partitioning (which cell should be on which processor) /// using the serial METIS library -- work is only done on MPi rank 0. /// </summary> /// <param name="cellWeightsLocal"> /// If not null, defines the weight associted with each cell on the current process /// </param> /// <param name="noOfPartitioningsToChooseFrom"> /// Tells METIS to compute /// </param> /// <returns> /// Index: local cell index, content: MPI Processor rank;<br/> /// This is the suggestion /// of ParMETIS for the grid partitioning: /// For each local cell index, the returned array contains the MPI /// process rank where the cell should be placed. /// </returns> public int[] ComputePartitionMETIS(int[] cellWeightsLocal = null, int noOfPartitioningsToChooseFrom = 1) { using (new FuncTrace()) { int size = this.Size; int rank = this.MyRank; if (size == 1) { return(new int[NoOfUpdateCells]); } if (this.NumberOfCells_l > int.MaxValue) { throw new Exception(String.Format( "Grid contains more than {0} cells and can thus not be partitioned using METIS. Use ParMETIS instead.", int.MaxValue)); } int J = (rank == 0) ? this.NumberOfCells : 0; // Setup communication; all send to rank 0 SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); if (rank != 0) { sms.SetCommPath(0); } sms.CommitCommPaths(); // Assemble adjacency lists on rank 0 IEnumerable <Neighbour>[] neighboursGlobal = new IEnumerable <Neighbour> [J]; { IEnumerable <Neighbour>[] neighboursLocal = GetCellNeighbourship(IncludeBcCells: false).Take(NoOfUpdateCells).ToArray(); if (rank == 0) { int localOffset = m_CellPartitioning.GetI0Offest(rank); int localLength = m_CellPartitioning.GetLocalLength(rank); for (int i = 0; i < localLength; i++) { neighboursGlobal[localOffset + i] = neighboursLocal[i]; } } else { sms.Transmitt(0, neighboursLocal); } while (sms.GetNext(out int senderRank, out IEnumerable <Neighbour>[] neighbours)) { int localOffset = m_CellPartitioning.GetI0Offest(senderRank); int localLength = m_CellPartitioning.GetLocalLength(senderRank); if (neighbours.Length != localLength) { throw new Exception(); } for (int i = 0; i < localLength; i++) { neighboursGlobal[localOffset + i] = neighbours[i]; } } } // Gather global weights on rank 0 int[] cellWeightsGlobal = null; if (cellWeightsLocal != null) { cellWeightsGlobal = new int[J]; if (rank == 0) { int localOffset = m_CellPartitioning.GetI0Offest(rank); int localLength = m_CellPartitioning.GetLocalLength(rank); for (int i = 0; i < localLength; i++) { cellWeightsGlobal[localOffset + i] = cellWeightsLocal[i]; } } else { sms.Transmitt(0, cellWeightsLocal); } while (sms.GetNext(out int senderRank, out int[] cellWeights)) { int localOffset = m_CellPartitioning.GetI0Offest(senderRank); int localLength = m_CellPartitioning.GetLocalLength(senderRank); if (cellWeights.Length != localLength) { throw new Exception(); } for (int i = 0; i < localLength; i++) { cellWeightsGlobal[localOffset + i] = cellWeights[i]; } } } int[] globalResult = new int[J]; if (rank == 0) { int[] xadj = new int[J + 1]; List <int> adjncy = new List <int>(J * m_RefElements[0].NoOfFaces); for (int j = 0; j < J; j++) { var cNj = neighboursGlobal[j]; int E = cNj.Count(); for (int e = 0; e < E; e++) { var NN = cNj.ElementAt(e); if (NN.Neighbour_GlobalIndex >= 0 && !NN.IsPeriodicNeighbour) { adjncy.Add((int)NN.Neighbour_GlobalIndex); } } xadj[j + 1] = adjncy.Count; } // Call METIS int nparts = size; Debug.Assert((cellWeightsGlobal == null) == (cellWeightsLocal == null)); int ncon = 1; // One weight per vertex/cell int objval = -1; // Value of the objective function at return time int[] Options = new int[METIS.METIS_NOPTIONS]; Options[(int)METIS.OptionCodes.METIS_OPTION_NCUTS] = noOfPartitioningsToChooseFrom; // 5 cuts Options[(int)METIS.OptionCodes.METIS_OPTION_NITER] = 10; // This is the default refinement iterations Options[(int)METIS.OptionCodes.METIS_OPTION_UFACTOR] = 30; // Maximum imbalance of 3 percent (this is the default kway clustering) METIS.ReturnCodes status = (METIS.ReturnCodes)METIS.PartGraphKway( nvtxs: ref J, ncon: ref ncon, xadj: xadj, adjncy: adjncy.ToArray(), vwgt: cellWeightsGlobal, // if null, METIS assumes all have weight 1 vsize: null, // No information about communication size adjwgt: null, // No edge weights nparts: ref nparts, tpwgts: null, // No weights for partition constraints ubvec: null, // No imbalance tolerance for constraints options: Options, objval: ref objval, part: globalResult); if (status != METIS.ReturnCodes.METIS_OK) { throw new Exception(String.Format( "Error partitioning the mesh. METIS reported {0}", status)); } int[] CountCheck = new int[size]; int J2 = this.NumberOfCells; for (int i = 0; i < J2; i++) { CountCheck[globalResult[i]]++; } for (int rnk = 0; rnk < size; rnk++) { if (CountCheck[rnk] <= 0) { throw new ApplicationException("METIS produced illegal partitioning - 0 cells on process " + rnk + "."); } } } int[] localLengths = new int[size]; for (int p = 0; p < localLengths.Length; p++) { localLengths[p] = this.CellPartitioning.GetLocalLength(p); } int[] localResult = globalResult.MPIScatterv(localLengths); return(localResult); } }
/// <summary> /// redistributes the grid, i.e. sends cells to different processors /// </summary> /// <param name="part"> /// MPI processor rank for each cell; index: local cell index; /// </param> public void RedistributeGrid(int[] part) { int Size; int MyRank; csMPI.Raw.Comm_Rank(csMPI.Raw._COMM.WORLD, out MyRank); csMPI.Raw.Comm_Size(csMPI.Raw._COMM.WORLD, out Size); // partition is no longer valid anymore! m_CellPartitioning = null; // int J = NoOfUpdateCells; if (part.Length < J) { throw new ArgumentException(); } // send cells to other processors // ============================== // count number of items for each processor int[] ItmCnt = new int[Size]; for (int j = 0; j < J; j++) { ItmCnt[part[j]]++; } // create messenger SerialisationMessenger sm = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); for (int p = 0; p < Size; p++) { if (ItmCnt[p] > 0 && p != MyRank) { sm.SetCommPath(p); } } sm.CommitCommPaths(); // sort cells according to processors (part notes which proc. will get a cell) List <Cell>[] exch = new List <Cell> [Size]; for (int p = 0; p < Size; p++) { if (ItmCnt[p] > 0 || p == MyRank) { exch[p] = new List <Cell>(ItmCnt[p]); } } for (int j = 0; j < J; j++) { exch[part[j]].Add(Cells[j]); } // start transmission for (int p = 0; p < Size; p++) { if (exch[p] != null && p != MyRank) { sm.Transmitt(p, exch[p]); } } // receive cells from other processors var myown = exch[MyRank]; List <Cell> o; int prc; while (sm.GetNext <List <Cell> >(out prc, out o)) { myown.AddRange(o); } // write back data to arrays this.Cells = myown.ToArray(); }
/// <summary> /// detects how the MPI nodes are distributed over compute nodes (SMP nodes) /// </summary> private void SMPEvaluation() { //int ht = m_Context.IOMaster.tracer.EnterFunction("BoSSS.Foundation.Comm.DatabaseDriver.SMPEvaluation"); using (new FuncTrace()) { ilPSP.MPICollectiveWatchDog.Watch(MPI.Wrappers.csMPI.Raw._COMM.WORLD); // define SMP rank; // for each MPI process, the SMP node index // index: MPI rank; content: SMP rank; int[] SMPRank = null; int NoOfSMPs = -1; { // we are using the computer name to determine // which MPI processes run on the same physical machine // send host name to proc 0. SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); if (MyRank > 0) { sms.SetCommPath(0); } sms.CommitCommPaths(); if (MyRank > 0) { sms.Transmitt(0, m_hostname); } int recvRnk; string nmn; sms.GetNext(out recvRnk, out nmn); if (MyRank == 0) { // receiving names form all processors List <string> hosts_unique = new List <string>(); hosts_unique.Add(m_hostname); string[] hosts = new string[Size]; SMPRank = new int[Size]; ArrayTools.SetAll(SMPRank, int.MinValue); hosts[0] = m_hostname; SMPRank[0] = hosts_unique.IndexOf(m_hostname); while (nmn != null) { if (hosts[recvRnk] != null) { throw new ApplicationException("should not happen."); } hosts[recvRnk] = nmn; int smpRnk = hosts_unique.IndexOf(nmn); if (smpRnk < 0) { hosts_unique.Add(nmn); smpRnk = hosts_unique.Count - 1; } SMPRank[recvRnk] = smpRnk; sms.GetNext(out recvRnk, out nmn); } NoOfSMPs = hosts_unique.Count; for (int i = 0; i < Size; i++) { if (hosts[i] == null || SMPRank[i] < 0) { throw new ApplicationException("fatal error in algorithm."); } } } else { // don't receive anything if (nmn != null) { // fatal error in algorithm throw new ApplicationException("ha?"); } } sms.Dispose(); } m_SMPSize = NoOfSMPs.MPIBroadcast(0, csMPI.Raw._COMM.WORLD); m_SMPRanks = SMPRank.MPIBroadcast(0, csMPI.Raw._COMM.WORLD); { // number of MPI processes per SMP rank; index: SMP rank m_MPIProcessesPerSMP = new int[m_SMPSize]; int[] _MPIProcessesPerSMP = new int[m_SMPSize]; _MPIProcessesPerSMP[m_SMPRanks[m_MyRank]]++; unsafe { fixed(int *pSnd = &_MPIProcessesPerSMP[0], pRcv = &m_MPIProcessesPerSMP[0]) { csMPI.Raw.Allreduce((IntPtr)pSnd, (IntPtr)pRcv, m_SMPSize, csMPI.Raw._DATATYPE.INT, csMPI.Raw._OP.SUM, csMPI.Raw._COMM.WORLD); } } } //m_Context.IOMaster.tracer.LeaveFunction(ht); } }
/// <summary> /// Computes the neighbor cells globally (i.e. over all MPI processors) for each local cell. /// </summary> /// <returns> /// <param name="IncludeBcCells"> /// If true, also the boundary condition cells (<see cref="BcCells"/>) will be included in the output array. /// </param> /// Cell-wise neighborship information: /// - index: local cell index <em>j</em>, i.e. correlates with <see cref="Cells"/>; if <paramref name="IncludeBcCells"/> is true, /// the information for boundary cells is added after the information for cells. /// - content: for the index <em>j</em> the set of neighbor cells. If the global index (<see cref="Neighbour.Neighbour_GlobalIndex"/>) /// is greater or equal than the global number of cells (<see cref="NumberOfCells"/>) the neighbor is a boundary condition cell, /// (<see cref="BcCells"/>). /// </returns> public IEnumerable <Neighbour>[] GetCellNeighbourship(bool IncludeBcCells) { ilPSP.MPICollectiveWatchDog.Watch(); using (new FuncTrace()) { var ftNeigh = GetFaceTagsNeigbourIndices(IncludeBcCells); var NPart = this.NodePartitioning; int K = NPart.LocalLength; int k0 = NPart.i0; int J = this.NoOfUpdateCells; int J_BC = IncludeBcCells ? this.NoOfBcCells : 0; int j0 = this.CellPartitioning.i0; int Jglob = this.CellPartitioning.TotalLength; int j0Bc = this.BcCellPartitioning.i0; // Which cells make use of a particular node? //------------------------------------------- // Index: Node index // Entry: Enumeration of global indices of cells that use this // particular node List <int>[] Nodes2Cells = new List <int> [K]; { for (int k = 0; k < K; k++) { Nodes2Cells[k] = new List <int>(); } // key: MPI processor rank // value: information packet Dictionary <int, List <NodeCellIndexPair> > Y = new Dictionary <int, List <NodeCellIndexPair> >(); for (int j = 0; j < (J + J_BC); j++) { Element Cell_j; RefElement Kref; int jCell_glob; if (j < J) { Cell_j = this.Cells[j]; Kref = this.m_RefElements.Single(KK => KK.SupportedCellTypes.Contains(Cell_j.Type)); jCell_glob = j + j0; } else { Cell_j = this.BcCells[j - J]; Kref = this.m_EdgeRefElements.Single(KK => KK.SupportedCellTypes.Contains(Cell_j.Type)); jCell_glob = (j - J) + j0Bc + Jglob; } var CellNodes = Cell_j.NodeIndices; if (CellNodes.Length != Kref.NoOfVertices) { throw new ApplicationException("error in data structure."); } foreach (int NodeId in CellNodes) { int target_prozi = NPart.FindProcess(NodeId); if (target_prozi == MyRank) { Nodes2Cells[NodeId - k0].Add(jCell_glob); } else { NodeCellIndexPair Packet; Packet.NodeId = NodeId; Packet.GlobalCellIndex = jCell_glob; List <NodeCellIndexPair> Z; if (!Y.TryGetValue(target_prozi, out Z)) { Z = new List <NodeCellIndexPair>(); Y.Add(target_prozi, Z); } Z.Add(Packet); } } } var W = SerialisationMessenger.ExchangeData(Y, csMPI.Raw._COMM.WORLD); foreach (var wp in W.Values) { foreach (NodeCellIndexPair Packet in wp) { Nodes2Cells[Packet.NodeId - k0].Add(Packet.GlobalCellIndex); } } } // For every cell, for every vertex in this cell: // Which other cells die also use this node? //----------------------------------------------- // 1st index: Local cell index // 2nd index: Cell vertex index // 3rd index: Collection of 'peer' cells int[][][] NodePeers = new int[J + J_BC][][]; { for (int j = 0; j < J + J_BC; j++) { Element Cell_j; if (j < J) { Cell_j = this.Cells[j]; } else { Cell_j = this.BcCells[j - J]; } NodePeers[j] = new int[Cell_j.NodeIndices.Length][]; } Dictionary <int, List <NodeCellListPair> > Y = new Dictionary <int, List <NodeCellListPair> >(); var CPart = this.CellPartitioning; var BcPart = this.BcCellPartitioning; for (int k = 0; k < K; k++) // loop over locally assigned nodes { int k_node = k + k0; var cell_list = Nodes2Cells[k].ToArray(); foreach (int jCell in cell_list) // loop over all cells that use node 'k' { int cell_proc; int local_offset; if (jCell < Jglob) { // normal cell cell_proc = CPart.FindProcess(jCell); local_offset = j0; } else { // boundary condition cell cell_proc = BcPart.FindProcess(jCell - Jglob); local_offset = Jglob + j0Bc; } if (cell_proc == MyRank) { int jCell_loc = jCell - local_offset; int kC; bool bfound = false; Element Cell_j; int oo; if (jCell < Jglob) { // normal cell Cell_j = this.Cells[jCell_loc]; oo = 0; } else { // boundary condition cell Cell_j = this.BcCells[jCell_loc]; oo = J; } for (kC = 0; kC < Cell_j.NodeIndices.Length; kC++) { if (Cell_j.NodeIndices[kC] == k_node) { bfound = true; break; } } if (!bfound) { throw new ApplicationException("error in algorithm."); } NodePeers[jCell_loc + oo][kC] = cell_list; } else { NodeCellListPair A; A.NodeId = k_node; A.CellList = cell_list; List <NodeCellListPair> Z; if (!Y.TryGetValue(cell_proc, out Z)) { Z = new List <NodeCellListPair>(); Y.Add(cell_proc, Z); } Z.Add(A); } } } var W = SerialisationMessenger.ExchangeData(Y, csMPI.Raw._COMM.WORLD); foreach (var wp in W.Values) { foreach (var P in wp) { int k_node = P.NodeId; int[] cell_list = P.CellList; foreach (int jCell in cell_list) { int cell_proc; int local_offset; if (jCell < Jglob) { // normal cell cell_proc = CPart.FindProcess(jCell); local_offset = j0; } else { // boundary condition cell cell_proc = BcPart.FindProcess(jCell - Jglob); local_offset = Jglob + j0Bc; } if (cell_proc == MyRank) { int jCell_loc = jCell - local_offset; Element Cell_j; int oo; if (jCell < Jglob) { // normal cell Cell_j = this.Cells[jCell_loc]; oo = 0; } else { // boundary condition cell Cell_j = this.BcCells[jCell_loc]; oo = J; } int kC; bool bfound = false; for (kC = 0; kC < Cell_j.NodeIndices.Length; kC++) { if (Cell_j.NodeIndices[kC] == k_node) { bfound = true; break; } } if (!bfound) { throw new ApplicationException("error in algorithm."); } NodePeers[jCell_loc + oo][kC] = cell_list; } } } } } // Assemble final result // --------------------- IEnumerable <Neighbour>[] CellNeighbours; { CellNeighbours = new IEnumerable <Neighbour> [J + J_BC]; for (int j = 0; j < J + J_BC; j++) // loop over cells //var Cell_j = this.Cells[j]; //int jCellGlob = j + j0; //var Kref = this.m_GridSimplices.Single(KK => KK.SupportedTypes.Contains(Cell_j.Type)); { Element Cell_j; RefElement Kref; int jCellGlob; if (j < J) { Cell_j = this.Cells[j]; Kref = this.m_RefElements.Single(KK => KK.SupportedCellTypes.Contains(Cell_j.Type)); jCellGlob = j + j0; } else { Cell_j = this.BcCells[j - J]; Kref = this.m_EdgeRefElements.Single(KK => KK.SupportedCellTypes.Contains(Cell_j.Type)); jCellGlob = (j - J) + j0Bc + Jglob; } var Cell_j_Neighs = new List <Neighbour>(); CellNeighbours[j] = Cell_j_Neighs; // find neighbor cells connected via grid nodes // -------------------------------------------- if (j < J) { //normal cells: match faces var faceVtx = Kref.FaceToVertexIndices; int[][] B = new int[faceVtx.GetLength(1)][]; for (int _iface = 0; _iface < Kref.NoOfFaces; _iface++) // loop over faces of cell 'j' (local index) resp. 'jCellGlob' (global index) { for (int iv = 0; iv < B.Length; iv++) { B[iv] = NodePeers[j][faceVtx[_iface, iv]]; } int NeighIdx = Intersect(B, jCellGlob); if (NeighIdx >= 0) { Neighbour nCN = default(Neighbour); nCN.Neighbour_GlobalIndex = NeighIdx; nCN.CellFaceTag.FaceIndex = _iface; nCN.CellFaceTag.ConformalNeighborship = true; Cell_j_Neighs.Add(nCN); } } } else { // boundary-condition cell: match the whole element int[][] B = new int[Kref.NoOfVertices][]; for (int iv = 0; iv < B.Length; iv++) { B[iv] = NodePeers[j][iv]; } int NeighIdx = Intersect(B, jCellGlob); if (NeighIdx >= 0) { Neighbour nCN = default(Neighbour); nCN.Neighbour_GlobalIndex = NeighIdx; nCN.CellFaceTag.FaceIndex = -1; nCN.CellFaceTag.ConformalNeighborship = true; Cell_j_Neighs.Add(nCN); } } // find neighbor cells connected via CellFaceTag's // ----------------------------------------------- var otherNeighbours = ftNeigh[j]; // ftNeigh is the result of CellFaceTag-based connectivity if (j < J) { var _Cell_j = (Cell)Cell_j; Debug.Assert(((otherNeighbours == null ? 0 : otherNeighbours.Length) == ((_Cell_j.CellFaceTags == null) ? 0 : _Cell_j.CellFaceTags.Length))); if (otherNeighbours != null) { for (int w = 0; w < otherNeighbours.Length; w++) { Debug.Assert(_Cell_j.CellFaceTags[w].NeighCell_GlobalID < 0 == otherNeighbours[w] < 0); if (_Cell_j.CellFaceTags[w].NeighCell_GlobalID >= 0) { if (Cell_j_Neighs.Where(neigh => neigh.Neighbour_GlobalIndex == otherNeighbours[w]).Count() <= 0) // filter duplicates { Cell_j_Neighs.Add(new Neighbour() { Neighbour_GlobalIndex = otherNeighbours[w], CellFaceTag = _Cell_j.CellFaceTags[w], }); } } } } } else { var BcCell_j = (BCElement)Cell_j; Debug.Assert(((otherNeighbours == null ? 0 : otherNeighbours.Length) == ((BcCell_j.NeighCell_GlobalIDs == null) ? 0 : BcCell_j.NeighCell_GlobalIDs.Length))); if (otherNeighbours != null) { for (int w = 0; w < otherNeighbours.Length; w++) { Cell_j_Neighs.Add(new Neighbour() { Neighbour_GlobalIndex = otherNeighbours[w], CellFaceTag = new CellFaceTag() { EdgeTag = BcCell_j.EdgeTag, FaceIndex = int.MinValue, NeighCell_GlobalID = BcCell_j.NeighCell_GlobalIDs[w], ConformalNeighborship = BcCell_j.Conformal } }); } } } } } return(CellNeighbours); } }
/// <summary> /// ctor. /// </summary> /// <param name="M"></param> /// <param name="ExtCol"> /// key: processor rank 'p' <br/> /// value: a list of column indices (within the local range of columns of <paramref name="M"/>), /// which should be editable at rank 'p'. /// </param> public MsrExtMatrix(IMutableMatrixEx M, IDictionary <int, int[]> ExtCol) { this.ColPart = M.ColPartition; int i0Row = (int)M.RowPartitioning.i0, I = M.RowPartitioning.LocalLength, i0Col = (int)this.ColPart.i0, J = this.ColPart.LocalLength; Mtx = M; // init // ==== ColToRowLocal = new List <int> [ColPart.LocalLength]; for (int j = 0; j < ColToRowLocal.Length; j++) { ColToRowLocal[j] = new List <int>(); } // build Column to row - mapping // ============================= ColToRowExternal = new Dictionary <int, List <int> >(); // key: global column index j, within the range of processor 'p' // values: global row indices SortedDictionary <int, List <int> > ColForProc = new SortedDictionary <int, List <int> >(); // key: MPI processor index 'p' // values: a set of global column indices, within the range of processor 'p', // that contain nonzero entries on this processor int[] col = null; int L; // loop over all rows... for (int i = 0; i < I; i++) { L = M.GetOccupiedColumnIndices(i + i0Row, ref col); // loop over all nonzero entries in the row... for (int l = 0; l < L; l++) { int ColIndex = col[l]; int localColInd = ColIndex - i0Col; if (localColInd >= 0 && localColInd < J) { // column of 'entry' is within the local range of this processor // + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ColToRowLocal[localColInd].Add(i + i0Row); } else { // column of 'entry' belongs to external processor 'proc' // + + + + + + + + + + + + + + + + + + + + + + + + + + + + int proc = this.ColPart.FindProcess(ColIndex); { //SortedDictionary<int, List<int>> ColToRowExt_proc; //if (!ColToRowExternal.ContainsKey(proc)) { // ColToRowExt_proc = new SortedDictionary<int, List<int>>(); // ColToRowExternal.Add(proc, ColToRowExt_proc); //} else { // ColToRowExt_proc = ColToRowExternal[proc]; //} int j = ColIndex; List <int> Rows4Col; if (!ColToRowExternal.ContainsKey(j)) { Rows4Col = new List <int>(); ColToRowExternal.Add(j, Rows4Col); } else { Rows4Col = ColToRowExternal[j]; } Rows4Col.Add(i + i0Row); } { List <int> ColForProc_proc; if (!ColForProc.ContainsKey(proc)) { ColForProc_proc = new List <int>(); ColForProc.Add(proc, ColForProc_proc); } else { ColForProc_proc = ColForProc[proc]; } if (!ColForProc_proc.Contains(ColIndex)) { ColForProc_proc.Add(ColIndex); } } } } } // communicate // =========== //SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw.MPI_COMM_WORLD); //{ // foreach (int proc in ColToRowExternal.Keys) { // sms.SetCommPath(proc); // } // sms.CommitCommPaths(); // // send // foreach (int proc in ColToRowExternal.Keys) { // SortedDictionary<int, List<int>> ColToRowExt_proc = ColToRowExternal[proc]; // SortedList _ColToRowExt_proc = new SortedList(); // foreach (int iCol in ColToRowExt_proc.Keys) // _ColToRowExt_proc.Add(iCol, ColToRowExt_proc[iCol].ToArray()); // } // // receive // int p; SortedList rcv; // sms.GetNext(out p, out rcv); // while (rcv != null) { // foreach (int col in rcv.Keys) { // int[] rowList = (int[])rcv[col]; // ColToRowLocal[col].AddRange(rowList); // } // sms.GetNext(out p, out rcv); // } //} //sms.Dispose(); // build 'ColProcessors' // ===================== //ColProcessors = new List<int>[ColToRowLocal.Length]; //for (int j = 0; j < ColToRowLocal.Length; j++) { // List<int> mpiRank = null; // foreach (int rowind in ColToRowLocal[j]) { // int riloc = rowind - i0Row; // if (riloc < 0 || riloc >= I) { // if (mpiRank == null) // mpiRank = new List<int>(); // mpiRank.Add(M.RowPartiton.FindProcess(rowind)); // } // ColProcessors[j] = mpiRank; // } //} // communicate: build 'ColProcessors' // ================================== { ColProcessors = new List <int> [ColPart.LocalLength]; SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); sms.SetCommPathsAndCommit(ColForProc.Keys); foreach (int proc in ColForProc.Keys) { sms.Transmit(proc, ColForProc[proc].ToArray()); } int i0Loc = (int)ColPart.i0; int rcvproc; int[] ColIndices; while (sms.GetNext(out rcvproc, out ColIndices)) { int Rank; { csMPI.Raw.Comm_Rank(csMPI.Raw._COMM.WORLD, out Rank); //Console.WriteLine("P# " + Rank + ": receiving from P# " + rcvproc); } foreach (int ColInd in ColIndices) { int localColInd = ColInd - i0Loc; if (localColInd < 0 || localColInd >= ColPart.LocalLength) { throw new IndexOutOfRangeException("internal error"); } if (ColProcessors[localColInd] == null) { ColProcessors[localColInd] = new List <int>(); } if (ColProcessors[localColInd].Contains(rcvproc)) { throw new ApplicationException("internal error."); } ColProcessors[localColInd].Add(rcvproc); } } sms.Dispose(); } if (ExtCol != null) { var send = new Dictionary <int, List <Tuple <int, List <int> > > >(); int myRank = M.RowPartitioning.MpiRank; foreach (var kv in ExtCol) { int rank = kv.Key; int[] ColIdx = kv.Value; var sendToRank = new List <Tuple <int, List <int> > >(); foreach (int iCol in ColIdx) { List <int> c2p = ColProcessors[iCol - i0Col]; var t = new Tuple <int, List <int> >(iCol, c2p != null ? new List <int>(c2p) : new List <int>()); t.Item2.Add(myRank); sendToRank.Add(t); } send.Add(rank, sendToRank); } var receive = SerialisationMessenger.ExchangeData(send, csMPI.Raw._COMM.WORLD); ColProcessorsExternal = new Dictionary <int, List <int> >(); foreach (var kv in receive) { var val = kv.Value; foreach (var t in val) { int iCol = t.Item1; List <int> ranks = t.Item2; int iMyRank = ranks.IndexOf(myRank); if (iMyRank >= 0) { ranks.RemoveAt(iMyRank); } ColProcessorsExternal.Add(t.Item1, t.Item2); Debug.Assert(this.ColPart.FindProcess(t.Item1) == kv.Key); } } #if DEBUG foreach (var procList in ColProcessorsExternal.Values) { Debug.Assert(procList.Contains(myRank) == false); } #endif } }
/// <summary> /// An MPI-collective call, which executes all column operations. /// </summary> public void CompleteColOperation() { int j0Loc = (int)m_Matrix.ColPart.i0; int LenLoc = m_Matrix.ColPart.LocalLength; // sort operations according to processor // ====================================== // keys: MPI processor rank p // values: list of operations to execute on p SortedDictionary <int, List <ColOp> > OperationsPerProcessor = new SortedDictionary <int, List <ColOp> >(); List <int> InvokedProc; for (int i = 0; i < DeferredColOpList.Count; i++) { ColOp op = DeferredColOpList[i]; bool skip = false; ColAddition ca = op as ColAddition; List <int> InvokesProcSrc = null; if (ca != null) { // we have a column addition - this requires some special treatments // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // problem 1: if // * the destination col. (aka. accumulator column), i.e. column no. 'op.jCol' // is zero, // * and the source row is nonzero // // then we need to send the command not only to processors which contain // nonzeros in destination row, but also the source row. // // problem 2: for subsequent operations, we may expect that some column which has // originally been zero now contains nonzero elements. // So, therefore, we have to add the processor set of the source row // (i.e. 'm_Matrix.ColProcessors[ca.iSrc]' to the processor set of the // destination row. if (m_Matrix.ColPart.IsInLocalRange(ca.iSrc)) { InvokesProcSrc = m_Matrix.ColProcessors[ca.iSrc - j0Loc]; } else { if (!this.m_Matrix.ColProcessorsExternal.TryGetValue(ca.iSrc, out InvokesProcSrc)) { throw new IndexOutOfRangeException("manipulation operation not available on current processor"); } } if (InvokesProcSrc != null) { if (m_Matrix.ColProcessors[op.jCol - j0Loc] == null) { m_Matrix.ColProcessors[op.jCol - j0Loc] = InvokesProcSrc; } else { InvokedProc = m_Matrix.ColProcessors[op.jCol - j0Loc]; foreach (int ps in InvokesProcSrc) { if (!InvokedProc.Contains(ps)) { InvokedProc.Add(ps); } } } } else { // optimization: source column is zero (on other processors) -> nothing to do skip = true; } } if (m_Matrix.ColPart.IsInLocalRange(op.jCol)) { InvokedProc = m_Matrix.ColProcessors[op.jCol - j0Loc]; } else { if (!this.m_Matrix.ColProcessorsExternal.TryGetValue(op.jCol, out InvokedProc)) { throw new IndexOutOfRangeException("manipulation operation not available on current processor"); } } if (InvokedProc != null && !skip) { foreach (int proc in InvokedProc) { bool skip2 = false; if (ca != null) { // optimization: don't need to send column addition if // the source row is zero if (!InvokesProcSrc.Contains(proc)) { skip2 = true; } } if (!skip2) { List <ColOp> DeferredOp_proc; if (OperationsPerProcessor.ContainsKey(proc)) { DeferredOp_proc = OperationsPerProcessor[proc]; } else { DeferredOp_proc = new List <ColOp>(); OperationsPerProcessor.Add(proc, DeferredOp_proc); } DeferredOp_proc.Add(op); } } } } // transmit to other processors // ============================ SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); foreach (int proc in OperationsPerProcessor.Keys) { sms.SetCommPath(proc); } sms.CommitCommPaths(); foreach (int proc in OperationsPerProcessor.Keys) { sms.Transmit(proc, OperationsPerProcessor[proc].ToArray()); } int rcvp; ColOp[] rcv; while (sms.GetNext(out rcvp, out rcv)) { DeferredColOpList.AddRange(rcv); // operations from different processors // commute (because they are bound to the column partition) // therefore, it doesn't matter how they are added //#if DEBUG // { // int Rank; // csMPI.Raw.Comm_Rank(csMPI.Raw._COMM.WORLD, out Rank); // //Console.WriteLine("P# " + Rank + ": " + rcv.Length + " operation(s) received from P# " + rcvp); // foreach (var op in rcv) { // if ((op.jCol >= m_Matrix.ColPart.i0 && op.jCol < (m_Matrix.ColPart.i0 + m_Matrix.ColPart.LocalLength))) // throw new ApplicationException("internal error"); // } // } //#endif } // execute operations // ================== int L = DeferredColOpList.Count; for (int l = 0; l < L; l++) { ColOp op = DeferredColOpList[l]; int jlocal = op.jCol - j0Loc; if (op is ColAddition) { double alpha = ((ColAddition)op).alpha; int iSrc = ((ColAddition)op).iSrc; int[] col; if (iSrc >= j0Loc && iSrc < (j0Loc + LenLoc)) { // operation in local column range col = m_Matrix.ColToRowLocal[iSrc - j0Loc].ToArray(); } else { // operation comes from other processor List <int> _col; if (m_Matrix.ColToRowExternal.TryGetValue(iSrc, out _col)) { col = _col.ToArray(); } else { col = new int[0]; } //col = m_Matrix.ColToRowExternal[iSrc].ToArray(); } foreach (int irow in col) { m_Matrix[irow, op.jCol] += m_Matrix[irow, iSrc] * alpha; } } else { int[] col; if (jlocal >= 0 && jlocal < LenLoc) { // operation in local column range col = m_Matrix.ColToRowLocal[jlocal].ToArray(); } else { // operation comes from other processor List <int> _col; if (m_Matrix.ColToRowExternal.TryGetValue(op.jCol, out _col)) { col = _col.ToArray(); } else { col = new int[0]; } //col = m_Matrix.ColToRowExternal[op.jCol].ToArray(); } if (op is ColMul) { double alpha = ((ColMul)op).alpha; foreach (int irow in col) { m_Matrix[irow, op.jCol] *= alpha; } } else if (op is ColClear) { foreach (int irow in col) { m_Matrix[irow, op.jCol] = 0; } } else { throw new NotImplementedException(); } } } // finish & return // =============== DeferredColOpList.Clear(); }
/// <summary> /// Resorts a vector according to this permutation, i.e. the /// <em>j</em>-th item of the input vector is copied to the /// <see cref="Values"/>[j]-th entry of the output vector. /// </summary> /// <param name="input"> /// Input vector, length must be equal to the length of this permutation, unchanged on exit. /// </param> /// <param name="output"> /// On exit, <paramref name="output"/>[<see cref="Values"/>[j]] = <paramref name="input"/>[j] /// </param> public void ApplyToVector <I>(IList <I> input, IList <I> output, IPartitioning outputPartitioning) { using (new FuncTrace()) { if (input.Count != this.LocalLength) { throw new ArgumentException("wrong size of input vector."); } if (output.Count != outputPartitioning.LocalLength) { throw new ArgumentException("wrong size of output vector."); } long[] TargetInd = this.Values; // keys: processors which should receive data from this processor Dictionary <int, ApplyToVector_Helper <I> > sendData = new Dictionary <int, ApplyToVector_Helper <I> >(); int out_myI0 = outputPartitioning.i0; int out_nextI0 = out_myI0 + outputPartitioning.LocalLength; int J = this.Partitioning.LocalLength; for (int j = 0; j < J; j++) { if (out_myI0 <= TargetInd[j] && TargetInd[j] < out_nextI0) { // target index located on this processor output[(int)(TargetInd[j] - out_myI0)] = input[j]; } else { // target index located on other processor int TargProc = outputPartitioning.FindProcess(TargetInd[j]); ApplyToVector_Helper <I> sendData_TargProc = null; if (!sendData.TryGetValue(TargProc, out sendData_TargProc)) { sendData_TargProc = new ApplyToVector_Helper <I>(); sendData.Add(TargProc, sendData_TargProc); } sendData_TargProc.Items.Add(input[j]); sendData_TargProc.TargetIndices.Add(TargetInd[j]); } } var rcvData = SerialisationMessenger.ExchangeData( sendData, MPI.Wrappers.csMPI.Raw._COMM.WORLD); foreach (var rcvPkt in rcvData.Values) { int K = rcvPkt.Items.Count; Debug.Assert(rcvPkt.Items.Count == rcvPkt.TargetIndices.Count); for (int k = 0; k < K; k++) { int locIdx = (int)(rcvPkt.TargetIndices[k]) - out_myI0; output[locIdx] = rcvPkt.Items[k]; } } } }