        public static BlockMsrMatrix CreateShapeOfOnes(BlockMsrMatrix A)
            var rowmap    = A._RowPartitioning;
            var colmap    = A._ColPartitioning;
            int RowBlocks = rowmap.LocalNoOfBlocks;
            int ColBlocks = colmap.LocalNoOfBlocks;

            BlockMsrMatrix B = new BlockMsrMatrix(rowmap, colmap);

            Partitioning rowpart = new Partitioning(RowBlocks);

            for (int iBlock = rowpart.i0; iBlock < rowpart.iE; iBlock++)
                for (int jBlock = rowpart.i0; jBlock < rowpart.iE; jBlock++)
                    int i0   = rowmap.GetBlockI0(iBlock);
                    int j0   = colmap.GetBlockI0(jBlock);
                    int iL   = rowmap.GetBlockLen(iBlock);
                    int jL   = colmap.GetBlockLen(jBlock);
                    var subM = MultidimensionalArray.Create(iL, jL);
                    A.ReadBlock(i0, j0, subM);
                    subM.ApplyAll(i => i != 0.0 ? 1 : 0);
                    B.AccBlock(i0, j0, 1.0, subM);
            double min, max;
            int    minc, minr, maxc, maxr;

            B.GetMinimumAndMaximum_MPILocal(out min, out minr, out minc, out max, out maxr, out maxc);
            Debug.Assert(min == 0);
            Debug.Assert(max == 1);
        public static void SetAll(this BlockMsrMatrix A, double val)
            var rowmap    = A._RowPartitioning;
            var colmap    = A._ColPartitioning;
            int RowBlocks = rowmap.LocalNoOfBlocks;
            int ColBlocks = colmap.LocalNoOfBlocks;

            Partitioning rowpart = new Partitioning(RowBlocks);

            for (int iBlock = rowpart.i0; iBlock < rowpart.iE; iBlock++)
                for (int jBlock = rowpart.i0; jBlock < rowpart.iE; jBlock++)
                    int i0   = rowmap.GetBlockI0(iBlock);
                    int j0   = colmap.GetBlockI0(jBlock);
                    int iL   = rowmap.GetBlockLen(iBlock);
                    int jL   = colmap.GetBlockLen(jBlock);
                    var subM = MultidimensionalArray.Create(iL, jL);
                    A.AccBlock(i0, j0, 1.0, subM);
            double min, max;
            int    minc, minr, maxc, maxr;

            A.GetMinimumAndMaximum_MPILocal(out min, out minr, out minc, out max, out maxr, out maxc);
            Debug.Assert(min == max);
            Debug.Assert(min == val);
        public static void CellwiseSubSelection(
            [Values(SelectionType.all_combined, SelectionType.degrees, SelectionType.species, SelectionType.variables)] SelectionType SType
            Console.WriteLine("SubSelection({0})", SType);

            //Arrange --- extracts entries of matrix according to hardcoded selection
            int            DGdegree       = 2;
            int            GridResolution = 4;
            var            mgo            = Utils.CreateTestMGOperator(XDGusage.all, DGdegree, MatrixShape.full_var_spec, GridResolution);
            int            sampleCellA    = Utils.GetIdxOfFirstBlockWith(mgo.Mapping, false); //1 species
            int            sampleCellB    = Utils.GetIdxOfFirstBlockWith(mgo.Mapping, true);  //2 species
            BlockMsrMatrix compA          = Utils.GetCellCompMatrix(SType, mgo, sampleCellA);
            BlockMsrMatrix compB          = Utils.GetCellCompMatrix(SType, mgo, sampleCellB);

            int iBlock = sampleCellB + mgo.Mapping.AggGrid.CellPartitioning.i0;
            int i0     = mgo.Mapping.GetBlockI0(iBlock);
            var block  = MultidimensionalArray.Create(mgo.Mapping.GetBlockLen(iBlock), mgo.Mapping.GetBlockLen(iBlock));

            mgo.OperatorMatrix.ReadBlock(i0, i0, block);

            //Arrange --- setup masking, which correspond to hardcoded
            SubBlockSelector sbsA = new SubBlockSelector(mgo.Mapping);

            sbsA.GetDefaultSelection(SType, sampleCellA); // single spec
            BlockMask        maskA = new BlockMask(sbsA, null);
            SubBlockSelector sbsB  = new SubBlockSelector(mgo.Mapping);

            sbsB.GetDefaultSelection(SType, sampleCellB); // double spec
            BlockMask maskB = new BlockMask(sbsB, null);

            //Arrange --- some time measurement
            Stopwatch stw = new Stopwatch();


            //Act --- subblock extraction
            var blocksA = maskA.GetDiagonalBlocks(mgo.OperatorMatrix, false, false);
            var blocksB = maskB.GetDiagonalBlocks(mgo.OperatorMatrix, false, false);


            //Assert ---
            Assert.IsTrue(blocksA.Length == 1);
            Assert.IsTrue(blocksB.Length == 1);
            Assert.IsTrue(compA.RowPartitioning.LocalLength == blocksA[0].GetLength(0));
            Assert.IsTrue(compB.RowPartitioning.LocalLength == blocksB[0].GetLength(0));

            //Assert --- compare masking of single spec cell
            Debug.Assert(compA.InfNorm() != 0.0);
            compA.AccBlock(0, 0, -1.0, blocksA[0]);
            Assert.IsTrue(compA.InfNorm() == 0.0);

            //Assert --- compare masking of double spec cell
            Debug.Assert(compB.InfNorm() != 0.0);
            compB.AccBlock(0, 0, -1.0, blocksB[0]);
            Assert.IsTrue(compB.InfNorm() == 0.0, String.Format("proc{0}: not fulfilled at block {1}", mgo.Mapping.MpiRank, sampleCellB));
        /// <summary>
        /// Prolongation/Injection operator to finer grid level.
        /// </summary>
        public BlockMsrMatrix GetProlongationOperator(MultigridMapping finerLevel)
            using (new FuncTrace()) {
                // Argument checking
                // =================

                if (!object.ReferenceEquals(finerLevel.AggGrid, this.AggGrid.ParentGrid))
                    throw new ArgumentException("Only prolongation/injection to next level is supported.");
                if (finerLevel.AggBasis.Length != this.AggBasis.Length)
                    throw new ArgumentException("");
                int NoOfVar = this.AggBasis.Length;

                MultidimensionalArray[][] InjOp = new MultidimensionalArray[NoOfVar][];
                AggregationGridBasis[]    B     = new AggregationGridBasis[NoOfVar];
                bool[] useX        = new bool[NoOfVar];
                int[]  DegreeS     = new int[NoOfVar];
                int[]  DegreeSfine = new int[NoOfVar];

                for (int iVar = 0; iVar < NoOfVar; iVar++)
                    InjOp[iVar]       = this.AggBasis[iVar].InjectionOperator;
                    B[iVar]           = AggBasis[iVar];
                    DegreeS[iVar]     = this.DgDegree[iVar];
                    DegreeSfine[iVar] = finerLevel.DgDegree[iVar];
                    if (DegreeSfine[iVar] < DegreeS[iVar])
                        throw new ArgumentException("Lower DG degree on finer grid is not supported by this method ");
                    useX[iVar] = this.AggBasis[iVar] is XdgAggregationBasis;
                    if (useX[iVar] != (finerLevel.AggBasis[iVar] is XdgAggregationBasis))
                        throw new ArgumentException("XDG / DG mismatch between this and finer level for " + iVar + "-th variable.");

                XdgAggregationBasis XB  = null;
                XdgAggregationBasis XBf = null;
                int[][,] spcIdxMap = null;
                SpeciesId[][] spc = null;
                //SpeciesId[][] spcf = null;
                for (int iVar = 0; iVar < NoOfVar; iVar++)
                    if (useX[iVar])
                        XB        = (XdgAggregationBasis)(B[iVar]);
                        XBf       = (XdgAggregationBasis)(finerLevel.AggBasis[iVar]);
                        spcIdxMap = XB.SpeciesIndexMapping;
                        spc       = XB.AggCellsSpecies;
                        //spcf = XBf.AggCellsSpecies;

                int[] Np      = this.AggBasis[0].GetNp();
                int[] Np_fine = finerLevel.AggBasis[0].GetNp();

                // create matrix
                // =============

                // init retval
                var PrlgMtx = new BlockMsrMatrix(finerLevel, this);

                int[][] C2F     = this.AggGrid.jCellCoarse2jCellFine;
                int     JCoarse = this.AggGrid.iLogicalCells.NoOfLocalUpdatedCells;
                //Debug.Assert((JCoarse == C2F.Length) || ());
                for (int jc = 0; jc < JCoarse; jc++)  // loop over coarse cells...
                    int[] AggCell = C2F[jc];
                    int   I       = AggCell.Length;

                    for (int iVar = 0; iVar < NoOfVar; iVar++)
                        int DgDeg  = DegreeS[iVar];
                        int DgDegF = DegreeSfine[iVar];
                        MultidimensionalArray Inj_iVar_jc = InjOp[iVar][jc];
                        Debug.Assert(Inj_iVar_jc.GetLength(0) == I);

                        bool useX_iVar = false;
                        if (useX[iVar])
                            if (spcIdxMap[jc] != null)
                                useX_iVar = true;

                        if (useX_iVar)
                            //throw new NotImplementedException("todo");

                            int NoOfSpc = XB.GetNoOfSpecies(jc);
                            int Np_col  = Np[DgDeg];
                            Debug.Assert(Np_col * NoOfSpc == B[iVar].GetLength(jc, DgDeg));

                            for (int iSpc = 0; iSpc < NoOfSpc; iSpc++)  // loop over species
                                SpeciesId spc_jc_i = spc[jc][iSpc];

                                int Col0 = this.GlobalUniqueIndex(iVar, jc, Np_col * iSpc);

                                for (int i = 0; i < I; i++)   // loop over finer cells
                                    int jf = AggCell[i];

                                    int iSpc_Row = XBf.GetSpeciesIndex(jf, spc_jc_i);
                                    if (iSpc_Row < 0)
                                        // nothing to do

                                    int Np_row = Np_fine[DgDegF];
                                    Debug.Assert(Np_row * XBf.GetNoOfSpecies(jf) == finerLevel.AggBasis[iVar].GetLength(jf, DgDegF));

                                    int Row0 = finerLevel.GlobalUniqueIndex(iVar, jf, Np_row * iSpc_Row);

                                    //if(Row0 <= 12 &&  12 < Row0 + Np_row) {
                                    //    if(Col0 <= 3 && 3 < Col0 + Np_col) {
                                    //        Debugger.Break();
                                    //    }
                                    PrlgMtx.AccBlock(Row0, Col0, 1.0, Inj_iVar_jc.ExtractSubArrayShallow(new[] { i, 0, 0 }, new[] { i - 1, Np_row - 1, Np_col - 1 }));
                            // ++++++++++++++++++
                            // standard DG branch
                            // ++++++++++++++++++

                            int Np_col = Np[DgDeg];
                            Debug.Assert(Np_col == B[iVar].GetLength(jc, DgDeg));
                            int Col0 = this.GlobalUniqueIndex(iVar, jc, 0);

                            for (int i = 0; i < I; i++)  // loop over finer cells
                                int jf     = AggCell[i];
                                int Np_row = Np_fine[DgDegF];
                                Debug.Assert(Np_row == finerLevel.AggBasis[iVar].GetLength(jf, DgDegF));

                                int Row0 = finerLevel.GlobalUniqueIndex(iVar, jf, 0);

                                PrlgMtx.AccBlock(Row0, Col0, 1.0, Inj_iVar_jc.ExtractSubArrayShallow(new[] { i, 0, 0 }, new[] { i - 1, Np_row - 1, Np_col - 1 }));
                                //if(Row0 <= 12 &&  12 < Row0 + Np_row) {
                                //        if(Col0 <= 3 && 3 < Col0 + Np_col) {
                                //            Debugger.Break();
                                //        }
                                //    }

                // return
                // ======

        private static void ExtractBlock(
            int[] _i0s,
            int[] _Lns,
            bool Sp2Full,
            BlockMsrMatrix MtxSp, ref MultidimensionalArray MtxFl) //
            Debug.Assert(_i0s.Length == _Lns.Length);
            int E = _i0s.Length;

            int NN = _Lns.Sum();

            if (MtxFl == null || MtxFl.NoOfRows != NN)
                Debug.Assert(Sp2Full == true);
                MtxFl = MultidimensionalArray.Create(NN, NN);
                if (Sp2Full)

            if (!Sp2Full)
                Debug.Assert(MtxSp != null);

            int i0Rowloc = 0;

            for (int eRow = 0; eRow < E; eRow++)   // loop over variables in configuration
                int i0Row = _i0s[eRow];
                int NRow  = _Lns[eRow];

                int i0Colloc = 0;
                for (int eCol = 0; eCol < E; eCol++)   // loop over variables in configuration
                    int i0Col = _i0s[eCol];
                    int NCol  = _Lns[eCol];

                    MultidimensionalArray MtxFl_blk;
                    if (i0Rowloc == 0 && NRow == MtxFl.GetLength(0) && i0Colloc == 0 && NCol == MtxFl.GetLength(1))
                        MtxFl_blk = MtxFl;
                        MtxFl_blk = MtxFl.ExtractSubArrayShallow(new[] { i0Rowloc, i0Colloc }, new[] { i0Rowloc + NRow - 1, i0Colloc + NCol - 1 });

                    if (Sp2Full)
                        if (MtxSp != null)
                            MtxSp.ReadBlock(i0Row, i0Col, MtxFl_blk);
                        MtxSp.AccBlock(i0Row, i0Col, 1.0, MtxFl_blk, 0.0);
                    for (int n_row = 0; n_row < NRow; n_row++)     // row loop...
                        for (int n_col = 0; n_col < NCol; n_col++) // column loop...
                            Debug.Assert(MtxFl[n_row + i0Rowloc, n_col + i0Colloc] == ((MtxSp != null) ? (MtxSp[n_row + i0Row, n_col + i0Col]) : (n_col == n_row ? 1.0 : 0.0)));
                    i0Colloc += NCol;
                i0Rowloc += NRow;
        private static void ExtractBlock(int jCell,
                                         AggregationGridBasis basis, int[] Degrees,
                                         ChangeOfBasisConfig conf,
                                         int E, int[] _i0s, bool Sp2Full,
                                         BlockMsrMatrix MtxSp, ref MultidimensionalArray MtxFl)
            int NN = conf.VarIndex.Sum(iVar => basis.GetLength(jCell, Degrees[iVar]));

            if (MtxFl == null || MtxFl.NoOfRows != NN)
                Debug.Assert(Sp2Full == true);
                MtxFl = MultidimensionalArray.Create(NN, NN);
                if (Sp2Full)

            if (!Sp2Full)
                Debug.Assert(MtxSp != null);

            int i0Rowloc = 0;

            for (int eRow = 0; eRow < E; eRow++)   // loop over variables in configuration
                int i0Row   = _i0s[eRow];
                int iVarRow = conf.VarIndex[eRow];

                int NRow = basis.GetLength(jCell, Degrees[iVarRow]);

                int i0Colloc = 0;
                for (int eCol = 0; eCol < E; eCol++)   // loop over variables in configuration

                    int i0Col   = _i0s[eCol];
                    int iVarCol = conf.VarIndex[eCol];

                    int NCol = basis.GetLength(jCell, Degrees[iVarCol]);

                    MultidimensionalArray MtxFl_blk;
                    if (i0Rowloc == 0 && NRow == MtxFl.GetLength(0) && i0Colloc == 0 && NCol == MtxFl.GetLength(1))
                        MtxFl_blk = MtxFl;
                        MtxFl_blk = MtxFl.ExtractSubArrayShallow(new[] { i0Rowloc, i0Colloc }, new[] { i0Rowloc + NRow - 1, i0Colloc + NCol - 1 });

                     * for(int n_row = 0; n_row < NRow; n_row++) { // row loop...
                     *  for(int n_col = 0; n_col < NCol; n_col++) { // column loop...
                     *      if(Sp2Full) {
                     *          // copy from sparse to full
                     *          MtxFl[n_row + i0Rowloc, n_col + i0Colloc] = (MtxSp != null) ? ( MtxSp[n_row + i0Row, n_col + i0Col]) : (n_col == n_row ? 1.0 : 0.0);
                     *      } else {
                     *          // the other way around.
                     *          MtxSp[n_row + i0Row, n_col + i0Col] = MtxFl[n_row + i0Rowloc, n_col + i0Colloc];
                     *      }
                     *  }
                     * }

                    if (Sp2Full)
                        if (MtxSp != null)
                            MtxSp.ReadBlock(i0Row, i0Col, MtxFl_blk);
                        Debug.Assert(MtxSp != null);
                        //for (int n_row = 0; n_row < NRow; n_row++) { // row loop...
                        //    for (int n_col = 0; n_col < NCol; n_col++) { // column loop...
                        //        Debug.Assert(MtxSp[n_row + i0Row, n_col + i0Col] == 0.0);
                        //    }
                        MtxSp.AccBlock(i0Row, i0Col, 1.0, MtxFl_blk, 0.0);
                    for (int n_row = 0; n_row < NRow; n_row++)     // row loop...
                        for (int n_col = 0; n_col < NCol; n_col++) // column loop...
                            Debug.Assert(MtxFl[n_row + i0Rowloc, n_col + i0Colloc] == ((MtxSp != null) ? (MtxSp[n_row + i0Row, n_col + i0Col]) : (n_col == n_row ? 1.0 : 0.0)));

                    i0Colloc += NCol;
                i0Rowloc += NRow;
        /// <summary>
        /// Permutation matrix from an old to a new partitioning.
        /// </summary>
        /// <param name="RowPart">Row partitioning, i.e. new data partitioning.</param>
        /// <param name="ColPart">Column partitioning, i.e. old data partitioning.</param>
        /// <param name="tau">
        /// Permutation from new to old Indices.
        /// </param>
        /// <returns></returns>
        static BlockMsrMatrix GetRowPermutationMatrix(IBlockPartitioning RowPart, IBlockPartitioning ColPart, Permutation tau)
            BlockMsrMatrix P = new BlockMsrMatrix(RowPart, ColPart);

            //if (RowPart.LocalNoOfBlocks != tau.LocalLength)
            //    throw new ArgumentException();
            if (RowPart.TotalNoOfBlocks != tau.TotalLength)
                throw new ArgumentException();
            if (!RowPart.AllBlockSizesEqual)
                throw new NotSupportedException("unable to perform redistribution for variable size blocking (unable to compute offsets for variable size blocking).");
            if (!ColPart.AllBlockSizesEqual)
                throw new NotSupportedException("unable to perform redistribution for variable size blocking (unable to compute offsets for variable size blocking).");
            if (RowPart.TotalLength != ColPart.TotalLength)
                throw new ArgumentException();

            int IBlock = RowPart.GetBlockLen(RowPart.FirstBlock);

            if (ColPart.GetBlockLen(ColPart.FirstBlock) != IBlock)
                throw new ArgumentException();

            int  J  = RowPart.LocalNoOfBlocks;
            long FB = RowPart.FirstBlock;

            long[] LocalBlockIdxS  = J.ForLoop(i => i + FB);
            long[] TargetBlockIdxS = new long[LocalBlockIdxS.Length];
            tau.EvaluatePermutation(LocalBlockIdxS, TargetBlockIdxS);

            MultidimensionalArray TempBlock = MultidimensionalArray.Create(IBlock, IBlock);

            for (int jSrc_Loc = 0; jSrc_Loc < J; jSrc_Loc++)   // loop over cells resp. local block-indices
                int jSrcGlob = jSrc_Loc + RowPart.FirstBlock;  // block-row index
                int jDstGlob = (int)TargetBlockIdxS[jSrc_Loc]; // block-column index

                int   i0  = RowPart.GetBlockI0(jSrcGlob);
                int   BT  = RowPart.GetBlockType(jSrcGlob);
                int[] _i0 = RowPart.GetSubblk_i0(BT);
                int[] Len = RowPart.GetSubblkLen(BT);
                Debug.Assert(IBlock == RowPart.GetBlockLen(jSrcGlob));

                int j0 = IBlock * jDstGlob; // this would not work for variable size blocking
                if (ColPart.IsLocalBlock(jDstGlob))
                    // column block corresponds to some cell
                    Debug.Assert(IBlock == ColPart.GetBlockLen(jDstGlob));
                    Debug.Assert(j0 == ColPart.GetBlockI0(jDstGlob));
                    int CBT = ColPart.GetBlockType(jDstGlob);
                    Debug.Assert(ArrayTools.AreEqual(_i0, ColPart.GetSubblk_i0(CBT)));
                    Debug.Assert(ArrayTools.AreEqual(Len, ColPart.GetSubblkLen(CBT)));
                Debug.Assert(_i0.Length == Len.Length);
                int K = _i0.Length;

                for (int i = 0; i < IBlock; i++)
                    TempBlock[i, i] = 0.0;

                for (int k = 0; k < K; k++)
                    int A = _i0[k];
                    int E = Len[k] + A;
                    for (int i = A; i < E; i++)
                        TempBlock[i, i] = 1;

                P.AccBlock(i0, j0, 1.0, TempBlock);

        private void AuxGetSubBlockMatrix(BlockMsrMatrix target, BlockMsrMatrix source, BlockMaskBase mask, bool ignoreCellCoupling, bool ignoreVarCoupling, bool ignoreSpecCoupling)
            bool IsLocalMask = mask.GetType() == typeof(BlockMaskLoc);

            extNi0[][][][] RowNi0s = mask.m_StructuredNi0;
            extNi0[][][][] ColNi0s = this.StructuredNi0;

            int auxIdx = 0;

            for (int iLoc = 0; iLoc < RowNi0s.Length; iLoc++)
                for (int jLoc = 0; jLoc < ColNi0s.Length; jLoc++)
                    if (ignoreCellCoupling && jLoc != iLoc)
                    for (int iVar = 0; iVar < RowNi0s[iLoc].Length; iVar++)
                        for (int jVar = 0; jVar < ColNi0s[jLoc].Length; jVar++)
                            if (ignoreVarCoupling && jVar != iVar)
                            for (int iSpc = 0; iSpc < RowNi0s[iLoc][iVar].Length; iSpc++)
                                for (int jSpc = 0; jSpc < ColNi0s[jLoc][jVar].Length; jSpc++)
                                    if (ignoreSpecCoupling && jSpc != iSpc)
                                    for (int iMode = 0; iMode < RowNi0s[iLoc][iVar][iSpc].Length; iMode++)
                                        int Trgi0 = RowNi0s[iLoc][iVar][iSpc][iMode].Si0;
                                        for (int jMode = 0; jMode < ColNi0s[jLoc][jVar][jSpc].Length; jMode++)
                                            extNi0 RowNi0 = RowNi0s[iLoc][iVar][iSpc][iMode];
                                            extNi0 ColNi0 = ColNi0s[jLoc][jVar][jSpc][jMode];
                                            int    Srci0  = IsLocalMask? RowNi0.Gi0: RowNi0.Li0 + source._RowPartitioning.i0 - m_map.LocalLength;
                                            int    Srcj0  = ColNi0.Gi0;

                                            var tmpBlock = MultidimensionalArray.Create(RowNi0.N, ColNi0.N);

                                            int Trgj0 = ColNi0s[jLoc][jVar][jSpc][jMode].Si0;
#if Debug
                                            SubMSR.ReadBlock(SubRowIdx, SubColIdx, tmpBlock);
                                            Debug.Assert(tmpBlock.Sum() == 0);
                                            Debug.Assert(tmpBlock.InfNorm() == 0);

                                            try {
                                                source.ReadBlock(Srci0, Srcj0,
                                            } catch (Exception e) {
                                                Console.WriteLine("row: " + Srci0);
                                                Console.WriteLine("col: " + Srcj0);
                                                throw new Exception(e.Message);
                                            Debug.Assert(Trgi0 < target.RowPartitioning.LocalLength);
                                            Debug.Assert(Trgj0 < target.ColPartition.LocalLength);

                                            target.AccBlock(Trgi0, Trgj0, 1.0, tmpBlock);
        /// <summary>
        /// ~
        /// </summary>
        public void Init(MultigridOperator op)
            BlockMsrMatrix M     = op.OperatorMatrix;
            var            MgMap = op.Mapping;

            this.m_MultigridOp = op;

            if (!M.RowPartitioning.EqualsPartition(MgMap.Partitioning))
                throw new ArgumentException("Row partitioning mismatch.");
            if (!M.ColPartition.EqualsPartition(MgMap.Partitioning))
                throw new ArgumentException("Column partitioning mismatch.");

            Mtx = M;
            int L = M.RowPartitioning.LocalLength;

             * diag = new double[L];
             * int i0 = Mtx.RowPartitioning.i0;
             * for(int i = 0; i < L; i++) {
             *  diag[i] = Mtx[i0 + i, i0 + i];
             * }

            //if (op.Mapping.MaximalLength != op.Mapping.MinimalLength)
            //    // 'BlockDiagonalMatrix' should be completely replaced by 'BlockMsrMatrix'
            //    throw new NotImplementedException("todo - Block Jacobi for variable block Sizes");

            Diag    = new BlockMsrMatrix(M._RowPartitioning, M._ColPartitioning);
            invDiag = new BlockMsrMatrix(M._RowPartitioning, M._ColPartitioning);
            int Jloc = MgMap.LocalNoOfBlocks;
            int j0   = MgMap.FirstBlock;
            MultidimensionalArray temp = null;

            for (int j = 0; j < Jloc; j++)
                int jBlock = j + j0;
                int Nblk   = MgMap.GetBlockLen(jBlock);
                int i0     = MgMap.GetBlockI0(jBlock);

                if (temp == null || temp.NoOfCols != Nblk)
                    temp = MultidimensionalArray.Create(Nblk, Nblk);

                M.ReadBlock(i0, i0, temp);
                Diag.AccBlock(i0, i0, 1.0, temp, 0.0);


                invDiag.AccBlock(i0, i0, 1.0, temp, 0.0);