/// <summary> /// /// </summary> /// <param name="iSrc"></param> /// <param name="iDst"></param> /// <param name="alpha"></param> public void ColAdditionDeferred(int iSrc, int iDst, double alpha) { TestIndex(iDst, m_Matrix.Mtx.ColPartition); #if DEBUG if (!m_Matrix.ColPart.IsInLocalRange(iSrc)) { if (!m_Matrix.ColProcessorsExternal.ContainsKey(iSrc)) { throw new IndexOutOfRangeException(); } } #endif ColAddition ca = new ColAddition(); ca.alpha = alpha; ca.iSrc = iSrc; ca.jCol = iDst; DeferredColOpList.Add(ca); }
/// <summary> /// An MPI-collective call, which executes all column operations. /// </summary> public void CompleteColOperation() { int j0Loc = (int)m_Matrix.ColPart.i0; int LenLoc = m_Matrix.ColPart.LocalLength; // sort operations according to processor // ====================================== // keys: MPI processor rank p // values: list of operations to execute on p SortedDictionary <int, List <ColOp> > OperationsPerProcessor = new SortedDictionary <int, List <ColOp> >(); List <int> InvokedProc; for (int i = 0; i < DeferredColOpList.Count; i++) { ColOp op = DeferredColOpList[i]; bool skip = false; ColAddition ca = op as ColAddition; List <int> InvokesProcSrc = null; if (ca != null) { // we have a column addition - this requires some special treatments // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // problem 1: if // * the destination col. (aka. accumulator column), i.e. column no. 'op.jCol' // is zero, // * and the source row is nonzero // // then we need to send the command not only to processors which contain // nonzeros in destination row, but also the source row. // // problem 2: for subsequent operations, we may expect that some column which has // originally been zero now contains nonzero elements. // So, therefore, we have to add the processor set of the source row // (i.e. 'm_Matrix.ColProcessors[ca.iSrc]' to the processor set of the // destination row. if (m_Matrix.ColPart.IsInLocalRange(ca.iSrc)) { InvokesProcSrc = m_Matrix.ColProcessors[ca.iSrc - j0Loc]; } else { if (!this.m_Matrix.ColProcessorsExternal.TryGetValue(ca.iSrc, out InvokesProcSrc)) { throw new IndexOutOfRangeException("manipulation operation not available on current processor"); } } if (InvokesProcSrc != null) { if (m_Matrix.ColProcessors[op.jCol - j0Loc] == null) { m_Matrix.ColProcessors[op.jCol - j0Loc] = InvokesProcSrc; } else { InvokedProc = m_Matrix.ColProcessors[op.jCol - j0Loc]; foreach (int ps in InvokesProcSrc) { if (!InvokedProc.Contains(ps)) { InvokedProc.Add(ps); } } } } else { // optimization: source column is zero (on other processors) -> nothing to do skip = true; } } if (m_Matrix.ColPart.IsInLocalRange(op.jCol)) { InvokedProc = m_Matrix.ColProcessors[op.jCol - j0Loc]; } else { if (!this.m_Matrix.ColProcessorsExternal.TryGetValue(op.jCol, out InvokedProc)) { throw new IndexOutOfRangeException("manipulation operation not available on current processor"); } } if (InvokedProc != null && !skip) { foreach (int proc in InvokedProc) { bool skip2 = false; if (ca != null) { // optimization: don't need to send column addition if // the source row is zero if (!InvokesProcSrc.Contains(proc)) { skip2 = true; } } if (!skip2) { List <ColOp> DeferredOp_proc; if (OperationsPerProcessor.ContainsKey(proc)) { DeferredOp_proc = OperationsPerProcessor[proc]; } else { DeferredOp_proc = new List <ColOp>(); OperationsPerProcessor.Add(proc, DeferredOp_proc); } DeferredOp_proc.Add(op); } } } } // transmit to other processors // ============================ SerialisationMessenger sms = new SerialisationMessenger(csMPI.Raw._COMM.WORLD); foreach (int proc in OperationsPerProcessor.Keys) { sms.SetCommPath(proc); } sms.CommitCommPaths(); foreach (int proc in OperationsPerProcessor.Keys) { sms.Transmit(proc, OperationsPerProcessor[proc].ToArray()); } int rcvp; ColOp[] rcv; while (sms.GetNext(out rcvp, out rcv)) { DeferredColOpList.AddRange(rcv); // operations from different processors // commute (because they are bound to the column partition) // therefore, it doesn't matter how they are added //#if DEBUG // { // int Rank; // csMPI.Raw.Comm_Rank(csMPI.Raw._COMM.WORLD, out Rank); // //Console.WriteLine("P# " + Rank + ": " + rcv.Length + " operation(s) received from P# " + rcvp); // foreach (var op in rcv) { // if ((op.jCol >= m_Matrix.ColPart.i0 && op.jCol < (m_Matrix.ColPart.i0 + m_Matrix.ColPart.LocalLength))) // throw new ApplicationException("internal error"); // } // } //#endif } // execute operations // ================== int L = DeferredColOpList.Count; for (int l = 0; l < L; l++) { ColOp op = DeferredColOpList[l]; int jlocal = op.jCol - j0Loc; if (op is ColAddition) { double alpha = ((ColAddition)op).alpha; int iSrc = ((ColAddition)op).iSrc; int[] col; if (iSrc >= j0Loc && iSrc < (j0Loc + LenLoc)) { // operation in local column range col = m_Matrix.ColToRowLocal[iSrc - j0Loc].ToArray(); } else { // operation comes from other processor List <int> _col; if (m_Matrix.ColToRowExternal.TryGetValue(iSrc, out _col)) { col = _col.ToArray(); } else { col = new int[0]; } //col = m_Matrix.ColToRowExternal[iSrc].ToArray(); } foreach (int irow in col) { m_Matrix[irow, op.jCol] += m_Matrix[irow, iSrc] * alpha; } } else { int[] col; if (jlocal >= 0 && jlocal < LenLoc) { // operation in local column range col = m_Matrix.ColToRowLocal[jlocal].ToArray(); } else { // operation comes from other processor List <int> _col; if (m_Matrix.ColToRowExternal.TryGetValue(op.jCol, out _col)) { col = _col.ToArray(); } else { col = new int[0]; } //col = m_Matrix.ColToRowExternal[op.jCol].ToArray(); } if (op is ColMul) { double alpha = ((ColMul)op).alpha; foreach (int irow in col) { m_Matrix[irow, op.jCol] *= alpha; } } else if (op is ColClear) { foreach (int irow in col) { m_Matrix[irow, op.jCol] = 0; } } else { throw new NotImplementedException(); } } } // finish & return // =============== DeferredColOpList.Clear(); }