/// <summary> /// detects how the MPI nodes are distributed over compute nodes (SMP nodes) /// </summary> private void SMPEvaluation() { //int ht = m_Context.IOMaster.tracer.EnterFunction("BoSSS.Foundation.Comm.DatabaseDriver.SMPEvaluation"); using (new FuncTrace()) { ilPSP.MPICollectiveWatchDog.Watch(MPI.Wrappers.csMPI.Raw._COMM.WORLD); // define SMP rank; // for each MPI process, the SMP node index // index: MPI rank; content: SMP rank; int[] SMPRank = null; int NoOfSMPs = -1; { // we are using the computer name to determine // which MPI processes run on the same physical machine // send host name to proc 0. SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); if (MyRank > 0) { sms.SetCommPath(0); } sms.CommitCommPaths(); if (MyRank > 0) { sms.Transmitt(0, m_hostname); } int recvRnk; string nmn; sms.GetNext(out recvRnk, out nmn); if (MyRank == 0) { // receiving names form all processors List <string> hosts_unique = new List <string>(); hosts_unique.Add(m_hostname); string[] hosts = new string[Size]; SMPRank = new int[Size]; ArrayTools.SetAll(SMPRank, int.MinValue); hosts[0] = m_hostname; SMPRank[0] = hosts_unique.IndexOf(m_hostname); while (nmn != null) { if (hosts[recvRnk] != null) { throw new ApplicationException("should not happen."); } hosts[recvRnk] = nmn; int smpRnk = hosts_unique.IndexOf(nmn); if (smpRnk < 0) { hosts_unique.Add(nmn); smpRnk = hosts_unique.Count - 1; } SMPRank[recvRnk] = smpRnk; sms.GetNext(out recvRnk, out nmn); } NoOfSMPs = hosts_unique.Count; for (int i = 0; i < Size; i++) { if (hosts[i] == null || SMPRank[i] < 0) { throw new ApplicationException("fatal error in algorithm."); } } } else { // don't receive anything if (nmn != null) { // fatal error in algorithm throw new ApplicationException("ha?"); } } sms.Dispose(); } m_SMPSize = NoOfSMPs.MPIBroadcast(0, csMPI.Raw._COMM.WORLD); m_SMPRanks = SMPRank.MPIBroadcast(0, csMPI.Raw._COMM.WORLD); { // number of MPI processes per SMP rank; index: SMP rank m_MPIProcessesPerSMP = new int[m_SMPSize]; int[] _MPIProcessesPerSMP = new int[m_SMPSize]; _MPIProcessesPerSMP[m_SMPRanks[m_MyRank]]++; unsafe { fixed(int *pSnd = &_MPIProcessesPerSMP[0], pRcv = &m_MPIProcessesPerSMP[0]) { csMPI.Raw.Allreduce((IntPtr)pSnd, (IntPtr)pRcv, m_SMPSize, csMPI.Raw._DATATYPE.INT, csMPI.Raw._OP.SUM, csMPI.Raw._COMM.WORLD); } } } //m_Context.IOMaster.tracer.LeaveFunction(ht); } }
/// <summary> /// An MPI-collective call, which executes all column operations. /// </summary> public void CompleteColOperation() { int j0Loc = (int)m_Matrix.ColPart.i0; int LenLoc = m_Matrix.ColPart.LocalLength; // sort operations according to processor // ====================================== // keys: MPI processor rank p // values: list of operations to execute on p SortedDictionary <int, List <ColOp> > OperationsPerProcessor = new SortedDictionary <int, List <ColOp> >(); List <int> InvokedProc; for (int i = 0; i < DeferredColOpList.Count; i++) { ColOp op = DeferredColOpList[i]; bool skip = false; ColAddition ca = op as ColAddition; List <int> InvokesProcSrc = null; if (ca != null) { // we have a column addition - this requires some special treatments // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // problem 1: if // * the destination col. (aka. accumulator column), i.e. column no. 'op.jCol' // is zero, // * and the source row is nonzero // // then we need to send the command not only to processors which contain // nonzeros in destination row, but also the source row. // // problem 2: for subsequent operations, we may expect that some column which has // originally been zero now contains nonzero elements. // So, therefore, we have to add the processor set of the source row // (i.e. 'm_Matrix.ColProcessors[ca.iSrc]' to the processor set of the // destination row. if (m_Matrix.ColPart.IsInLocalRange(ca.iSrc)) { InvokesProcSrc = m_Matrix.ColProcessors[ca.iSrc - j0Loc]; } else { if (!this.m_Matrix.ColProcessorsExternal.TryGetValue(ca.iSrc, out InvokesProcSrc)) { throw new IndexOutOfRangeException("manipulation operation not available on current processor"); } } if (InvokesProcSrc != null) { if (m_Matrix.ColProcessors[op.jCol - j0Loc] == null) { m_Matrix.ColProcessors[op.jCol - j0Loc] = InvokesProcSrc; } else { InvokedProc = m_Matrix.ColProcessors[op.jCol - j0Loc]; foreach (int ps in InvokesProcSrc) { if (!InvokedProc.Contains(ps)) { InvokedProc.Add(ps); } } } } else { // optimization: source column is zero (on other processors) -> nothing to do skip = true; } } if (m_Matrix.ColPart.IsInLocalRange(op.jCol)) { InvokedProc = m_Matrix.ColProcessors[op.jCol - j0Loc]; } else { if (!this.m_Matrix.ColProcessorsExternal.TryGetValue(op.jCol, out InvokedProc)) { throw new IndexOutOfRangeException("manipulation operation not available on current processor"); } } if (InvokedProc != null && !skip) { foreach (int proc in InvokedProc) { bool skip2 = false; if (ca != null) { // optimization: don't need to send column addition if // the source row is zero if (!InvokesProcSrc.Contains(proc)) { skip2 = true; } } if (!skip2) { List <ColOp> DeferredOp_proc; if (OperationsPerProcessor.ContainsKey(proc)) { DeferredOp_proc = OperationsPerProcessor[proc]; } else { DeferredOp_proc = new List <ColOp>(); OperationsPerProcessor.Add(proc, DeferredOp_proc); } DeferredOp_proc.Add(op); } } } } // transmit to other processors // ============================ SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); foreach (int proc in OperationsPerProcessor.Keys) { sms.SetCommPath(proc); } sms.CommitCommPaths(); foreach (int proc in OperationsPerProcessor.Keys) { sms.Transmit(proc, OperationsPerProcessor[proc].ToArray()); } int rcvp; ColOp[] rcv; while (sms.GetNext(out rcvp, out rcv)) { DeferredColOpList.AddRange(rcv); // operations from different processors // commute (because they are bound to the column partition) // therefore, it doesn't matter how they are added //#if DEBUG // { // int Rank; // csMPI.Raw.Comm_Rank(csMPI.Raw._COMM.WORLD, out Rank); // //Console.WriteLine("P# " + Rank + ": " + rcv.Length + " operation(s) received from P# " + rcvp); // foreach (var op in rcv) { // if ((op.jCol >= m_Matrix.ColPart.i0 && op.jCol < (m_Matrix.ColPart.i0 + m_Matrix.ColPart.LocalLength))) // throw new ApplicationException("internal error"); // } // } //#endif } // execute operations // ================== int L = DeferredColOpList.Count; for (int l = 0; l < L; l++) { ColOp op = DeferredColOpList[l]; int jlocal = op.jCol - j0Loc; if (op is ColAddition) { double alpha = ((ColAddition)op).alpha; int iSrc = ((ColAddition)op).iSrc; int[] col; if (iSrc >= j0Loc && iSrc < (j0Loc + LenLoc)) { // operation in local column range col = m_Matrix.ColToRowLocal[iSrc - j0Loc].ToArray(); } else { // operation comes from other processor List <int> _col; if (m_Matrix.ColToRowExternal.TryGetValue(iSrc, out _col)) { col = _col.ToArray(); } else { col = new int[0]; } //col = m_Matrix.ColToRowExternal[iSrc].ToArray(); } foreach (int irow in col) { m_Matrix[irow, op.jCol] += m_Matrix[irow, iSrc] * alpha; } } else { int[] col; if (jlocal >= 0 && jlocal < LenLoc) { // operation in local column range col = m_Matrix.ColToRowLocal[jlocal].ToArray(); } else { // operation comes from other processor List <int> _col; if (m_Matrix.ColToRowExternal.TryGetValue(op.jCol, out _col)) { col = _col.ToArray(); } else { col = new int[0]; } //col = m_Matrix.ColToRowExternal[op.jCol].ToArray(); } if (op is ColMul) { double alpha = ((ColMul)op).alpha; foreach (int irow in col) { m_Matrix[irow, op.jCol] *= alpha; } } else if (op is ColClear) { foreach (int irow in col) { m_Matrix[irow, op.jCol] = 0; } } else { throw new NotImplementedException(); } } } // finish & return // =============== DeferredColOpList.Clear(); }