public override int go()
        {
            int    j, q, r, s, m1, m2, _i, _j, _k;
            double chk_Real, chk_Imag;
            double allchk_Real = 0, allchk_Imag = 0;
            int    node = this.GlobalRank;

            worldcomm = this.WorldComm;

            chk_Real = 0.0;
            chk_Imag = 0.0;

            int idx = 0;

            for (j = 1; j <= 1024; j++)
            {
                q = (int)mod(j, nx) + 1;
                if (q >= xstart[0] && q <= xend[0])
                {
                    r = (int)mod(3 * j, ny) + 1;
                    if (r >= ystart[0] && r <= yend[0])
                    {
                        s = (int)mod(5 * j, nz) + 1;
                        if (s >= zstart[0] && s <= zend[0])
                        {
                            idx      = (((s - zstart[0]) * d2 + (r - ystart[0])) * d1 + (q - xstart[0])) * 2;
                            m1       = (idx % size1);
                            m2       = (m1 % size2);
                            _i       = idx / size1;
                            _j       = m1 / size2;
                            _k       = m2 / 2;
                            chk_Real = chk_Real + u2[_i, _j, _k, REAL];
                            chk_Imag = chk_Imag + u2[_i, _j, _k, IMAG];
                        }
                    }
                }
            }
            chk_Real = chk_Real / ((double)(nx * ny * nz));
            chk_Imag = chk_Imag / ((double)(nx * ny * nz));

            allchk_Real = worldcomm.Reduce <double>(chk_Real, MPI.Operation <double> .Add, root);
            allchk_Imag = worldcomm.Reduce <double>(chk_Imag, MPI.Operation <double> .Add, root);

            if (node == root)
            {
                Console.WriteLine(" T = " + iter + "  Checksum = (" + allchk_Real + ") (" + allchk_Imag + ")");
            }
            if (iter >= 0)
            {
                sums[iter * 2 + REAL] = allchk_Real;
                sums[iter * 2 + IMAG] = allchk_Imag;
            }
            return(0);
        }
        private static DirtyImage ForwardCalculateB(Intracommunicator comm, GriddingConstants c, List <List <Subgrid> > metadata, Complex[,,] visibilities, double[,,] uvw, double[] frequencies, Complex[,] PsfCorrelation, float[,] psfCut, float maxSidelobe, Stopwatch watchIdg)
        {
            Stopwatch another = new Stopwatch();

            comm.Barrier();
            if (comm.Rank == 0)
            {
                watchIdg.Start();
            }

            var localGrid = IDG.Grid(c, metadata, visibilities, uvw, frequencies);

            float[,] image = null;
            float maxSideLobeLevel = 0.0f;
            var   grid_total       = comm.Reduce <Complex[, ]>(localGrid, SequentialSum, 0);

            if (comm.Rank == 0)
            {
                var dirtyImage = FFT.BackwardFloat(grid_total, c.VisibilitiesCount);
                FFT.Shift(dirtyImage);
                if (comm.Rank == 0)
                {
                    FitsIO.Write(dirtyImage, "dirtyImage.fits");
                }
                maxSideLobeLevel = maxSidelobe * Residuals.GetMax(dirtyImage);
                //remove spheroidal

                image = Residuals.CalcGradientMap(dirtyImage, PsfCorrelation, new Rectangle(0, 0, psfCut.GetLength(0), psfCut.GetLength(1)));
                watchIdg.Stop();
            }
            comm.Broadcast(ref maxSideLobeLevel, 0);
            comm.Broadcast(ref image, 0);
            return(new DirtyImage(image, maxSideLobeLevel));
        }
        public override void synchronize()
        {
            Intracommunicator localComm = mpi.localComm(this);

            int[] ranks_of_collector = mpi.ranksOf(this, "collect");
            int   root_collect       = ranks_of_collector[0];

            int[] ranks_of_sender   = mpi.ranksOf(this, "send");
            int   number_of_senders = ranks_of_sender.Length;

            // RECEIVE THE SIZE FROM ONE OF THE SENDERS. No overhead ...
            int size = localComm.Receive <int>(Intracommunicator.anySource, 0);

            double[] dummy_result = new double[size];
            double[] local_result = localComm.Reduce <double>(dummy_result, Operation <double> .Add, root_collect);

            double result = 0.0D;

            foreach (double r in local_result)
            {
                result += r;
            }

            data.Value = result;
        } // end activate method
Esempio n. 4
0
        static void Main(string[] args)
        {
            int primeNr = 0;
            int totalNr = 0;

            //initialize MPI Environment
            using (new MPI.Environment(ref args))
            {
                // each communicator has an unique idenification id in order to specify the source and the destination
                processId = Communicator.world.Rank;

                // size of communicator
                nrPerProcessor = Communicator.world.Size;

                //Blocks the current process until all other processes in the current communicator have reached this routine.
                Communicator.world.Barrier();

                start  = 2 + processId * (n - 1) / nrPerProcessor;
                finish = 1 + (processId + 1) * (n - 1) / nrPerProcessor;

                Intracommunicator world = Communicator.world;


                for (j = start; j <= finish; j++)
                {
                    if (isPrime(j))
                    {
                        ++primeNr;
                        Console.WriteLine("The prime nr {0} has been found", j);
                    }
                }

                if (world.Rank == 0)
                {
                    //world.Reduce - Combines the values sent by all processes using a predefined operator and places result in the receive buffer of the root process
                    totalNr = world.Reduce <int>(primeNr, Operation <int> .Add, 0);

                    System.Console.WriteLine("The Number of Prime Numbers found is: {0}", totalNr);
                }
                else
                {
                    world.Reduce <int>(primeNr, Operation <int> .Add, 0);
                }
            }
        }
        public override void synchronize()
        {
            Intracommunicator localComm = mpi.localComm(this);

            int[] a = mpi.ranksOf(this, "send");

            int root = a[0];

            localComm.Reduce <double>(data.Value, Operation <double> .Add, root);
        } // end activate method
        public override void synchronize()
        {
            Intracommunicator localComm = mpi.localComm(this);

            int[] ranks_of_collector = mpi.ranksOf(this, "sum");
            int   root_collect       = this.RanksInv[ranks_of_collector[0]];

            double result = data.Value; // mudará para "double[] result".

            localComm.Reduce <double>(result, Operation <double> .Add, root_collect);
        } // end activate method
Esempio n. 7
0
        static void Main(string[] args)
        {
            using (new MPI.Environment(ref args))
            {
                int root      = 0;
                int arraySize = 100;
                if (args.Length == 1)
                {
                    arraySize = Convert.ToInt32(args[0]);
                }

                Intracommunicator comm = Communicator.world;

                int[] lotsOfNumbers = new int[arraySize];

                if (comm.Rank == 0)
                {
                    for (int i = 0; i < lotsOfNumbers.Length; i++)
                    {
                        lotsOfNumbers[i] = i;
                    }
                }

                int sum = 0;

                comm.Broadcast(ref lotsOfNumbers, 0);

                //divides up the work
                int x             = arraySize / comm.Size;
                int startingIndex = comm.Rank * x;
                int endingIndex   = startingIndex + x;

                if (comm.Rank == comm.Size - 1)
                {
                    endingIndex = lotsOfNumbers.Length;
                }

                for (int i = startingIndex; i < endingIndex; i++)
                {
                    sum += lotsOfNumbers[i];
                }
                Console.WriteLine("Rank " + comm.Rank + ": " + "summed the numbers from index " + startingIndex + " to index " + (endingIndex - 1) + " and got " + sum + ".");

                int totalSum = comm.Reduce(sum, Operation <int> .Add, root);


                if (comm.Rank == root)
                {
                    Console.WriteLine("The total sum is: " + totalSum);
                }
            }
        }
        private static float[,] CalculatePSF(Intracommunicator comm, GriddingConstants c, List <List <Subgrid> > metadata, double[,,] uvw, bool[,,] flags, double[] frequencies)
        {
            float[,] psf = null;
            var localGrid = IDG.GridPSF(c, metadata, uvw, flags, frequencies);
            var psf_total = comm.Reduce(localGrid, SequentialSum, 0);

            if (comm.Rank == 0)
            {
                psf = FFT.BackwardFloat(psf_total, c.VisibilitiesCount);
                FFT.Shift(psf);
            }
            comm.Broadcast(ref psf, 0);

            return(psf);
        }
Esempio n. 9
0
    static void Main(string[] args)
    {
        int dartsPerProcessor = 10000;

        using (new MPI.Environment(ref args))
        {
            if (args.Length > 0)
            {
                dartsPerProcessor = Convert.ToInt32(args[0]);
            }

            Intracommunicator world  = Communicator.world;
            Random            random = new Random(5 * world.Rank);
            int dartsInCircle        = 0;
            for (int i = 0; i < dartsPerProcessor; ++i)
            {
                double x = (random.NextDouble() - 0.5) * 2;
                double y = (random.NextDouble() - 0.5) * 2;
                if (x * x + y * y <= 1.0)
                {
                    ++dartsInCircle;
                }
            }

            if (world.Rank == 0)
            {
                int totalDartsInCircle = world.Reduce <int>(dartsInCircle, Operation <int> .Add, 0);
                System.Console.WriteLine("Pi is approximately {0:F15}.",
                                         4 * (double)totalDartsInCircle / (world.Size * (double)dartsPerProcessor));
            }
            else
            {
                world.Reduce <int>(dartsInCircle, Operation <int> .Add, 0);
            }
        }
    }
        public override void synchronize()
        {
            Intracommunicator localComm = mpi.localComm(this);

            int[] ranks_of_collector = mpi.ranksOf(this, "collect");
            int   root_collect       = ranks_of_collector[0];

            double result = data.Value; // mudará para "double[] result".

            int     size = 0;           // result.Length;
            Request req  = localComm.ImmediateSend <int>(size, root_collect, 0);

            localComm.Reduce <double>(result, Operation <double> .Add, root_collect);

            if (/*!req.Test()*/ true)
            {
                req.Cancel();
            }
        } // end activate method
        private const int ROOT_PROCESS_ID = 0;            //  Define Root Process ID
        public static int Main(string[] args)
        {
            using (new MPI.Environment(ref args))                        //  Initialize MPI Environment
            {
                Intracommunicator comm     = Communicator.world;         //  Define a shortcut to world communicator
                string[]          contents = null;                       //  Right here, we will store the lines of the text file
                char what = '\0';                                        //  The character which the user enters

                int    totalCharacters  = 0;                             //  Total characters per process
                int    totalOccurrences = 0;                             //  Total occurrences per process
                double totalTime        = 0;                             //  Total time per process
                bool   verbose          = false;                         //  Verbose output on command line
                if (args.Length > 0 && args[0].ToLower() == "--verbose") //  If the flag "--verbose" exists, set verbose = true
                {
                    verbose = true;
                }

                int allCharacters;  //  All characters by all processes
                int allOccurrences; //  All occurrencess by all processes

                //  If we're in the root process
                if (comm.Rank == ROOT_PROCESS_ID)
                {
                    //  Ask the user to enter the character he want to search for
                    Console.Write("Enter the character you want to search for: ");
                    what     = (char)Console.Read();
                    contents = File.ReadAllLines(PATH);        //  Load contents of file inside the array
                }
                comm.Broadcast(ref contents, ROOT_PROCESS_ID); //  Broadcast contents from root
                comm.Broadcast(ref what, ROOT_PROCESS_ID);     //  Broadcast character from root

                //  All processes must reach this point before going on
                //  This is important for time measurement
                comm.Barrier();
                double start = MPI.Environment.Time;    //  The start of time measuring

                //  Let's divide the load for processes, each process is responsible for a specific number of lines
                for (int i = comm.Rank; i < contents.Length; i += comm.Size)
                {
                    if (verbose)
                    {
                        Console.WriteLine("Process ID = {0}, Line number = {1}", comm.Rank, i);
                    }
                    for (int j = 0; j < contents[i].Length; j++)
                    {
                        if (contents[i][j] == what) //  An occurrence has been found
                        {
                            if (verbose)
                            {
                                Console.WriteLine("Process {0} has found the character at {1}:{2}", comm.Rank, i + 1, j + 1);
                            }
                            totalOccurrences += 1;
                        }
                        totalCharacters += 1;
                    }
                }

                //  All processes must reach here before we stop the timer
                comm.Barrier();
                double end = MPI.Environment.Time;                                                     //  The end of time measurement

                allOccurrences = comm.Reduce(totalOccurrences, Operation <int> .Add, ROOT_PROCESS_ID); //  Accumulate all occurrencess
                allCharacters  = comm.Reduce(totalCharacters, Operation <int> .Add, ROOT_PROCESS_ID);  //  Accumulate all characters
                totalTime      = comm.Reduce(end - start, Operation <double> .Max, ROOT_PROCESS_ID);   //  The overall time is the longest time of all processes

                Console.WriteLine("Total occurrences found by process {0} is {1}", comm.Rank, totalOccurrences);
                Console.WriteLine("Total characters processed by {0} is {1}", comm.Rank, totalCharacters);

                //  Finally, if we're here, print all information
                if (comm.Rank == ROOT_PROCESS_ID)
                {
                    Console.WriteLine("Overall occurrences = {0}", allOccurrences);
                    Console.WriteLine("Overall characters = {0}", allCharacters);
                    Console.WriteLine("Overall time = {0}s", totalTime);
                }
            }
            return(0);
        }
Esempio n. 12
0
        public override void synchronize()
        {
            Intracommunicator localComm = mpi.localComm(this);

            data.Value = localComm.Reduce <double>(0.0, Operation <double> .Add, localComm.Rank);
        } // end activate method
Esempio n. 13
0
    static void RunTests(int root)
    {
        Intracommunicator world = Communicator.world;

        world.Barrier();

        if (world.Rank == root)
        {
            System.Console.WriteLine("Testing from root " + root);
        }

        // Test addition of integers
        int sum      = world.Reduce(world.Rank, addInts, root);
        int expected = world.Size * (world.Size - 1) / 2;

        if (world.Rank == root)
        {
            MPIDebug.Assert(sum == expected);
        }
        else
        {
            MPIDebug.Assert(sum == default(int));
        }

        if (world.Rank == root)
        {
            System.Console.WriteLine("Sum of ranks = " + sum);
        }

        // Test addition of integer points
        if (world.Rank == root)
        {
            Point point_sum = world.Reduce(new Point(world.Rank, world.Size - world.Rank), Point.Plus, root);
            MPIDebug.Assert(point_sum.x == sum && point_sum.y == (world.Size + 1) * world.Size / 2);
            System.Console.WriteLine("Sum of points = (" + point_sum.x + ", " + point_sum.y + ")");
        }
        else
        {
            world.Reduce(new Point(world.Rank, world.Size - world.Rank), Point.Plus, root);
        }

        // Test addition of integer arrays
        if (world.Rank == root)
        {
            System.Console.Write("Testing reduction of integer arrays...");
            int[] arraySum = null;
            world.Reduce(new int[] { world.Rank, world.Size - world.Rank }, Operation <int> .Add, root, ref arraySum);
            MPIDebug.Assert(arraySum[0] == sum && arraySum[1] == (world.Size + 1) * world.Size / 2);
            System.Console.WriteLine(" done.");
        }
        else
        {
            world.Reduce(new int[] { world.Rank, world.Size - world.Rank }, Operation <int> .Add, root);
        }

        // Test concatenation of string arrays
        if (world.Rank == root)
        {
            System.Console.Write("Testing reduction of string arrays...");
            string[] strArray = null;
            world.Reduce(new string[] { world.Rank.ToString(), "World" }, Operation <string> .Add, root, ref strArray);

            string[] expectedStrs = new string[2] {
                "", ""
            };
            for (int p = 0; p < world.Size; ++p)
            {
                expectedStrs[0] += p.ToString();
                expectedStrs[1] += "World";
            }
            MPIDebug.Assert(expectedStrs[0] == strArray[0]);
            MPIDebug.Assert(expectedStrs[1] == strArray[1]);

            System.Console.WriteLine(" done.");
        }
        else
        {
            world.Reduce(new string[] { world.Rank.ToString(), "World" }, Operation <string> .Add, root);
        }

        // Test reduction on boolean values
        if (world.Rank == root)
        {
            System.Console.Write("Testing reduction of bools...");
            bool result = world.Reduce(true, Operation <bool> .LogicalAnd, root);
            MPIDebug.Assert(result == true);
            System.Console.WriteLine(" done.");
        }
        else
        {
            world.Reduce(true, Operation <bool> .LogicalAnd, root);
        }

        // Test reduction on boolean arrays
        if (world.Rank == root)
        {
            System.Console.Write("Testing reduction of bool arrays...");
            bool[] boolArray = null;
            world.Reduce(new bool[] { false, world.Rank % 2 != 0, true }, Operation <bool> .LogicalOr, root, ref boolArray);
            MPIDebug.Assert(boolArray[0] == false);
            MPIDebug.Assert(boolArray[1] == (world.Size > 1));
            MPIDebug.Assert(boolArray[2] == true);
            System.Console.WriteLine(" done.");
        }
        else
        {
            world.Reduce(new bool[] { false, world.Rank % 2 != 0, false }, Operation <bool> .LogicalOr, root);
        }
    }
Esempio n. 14
0
        static void Main(string[] args)
        {
            using (new MPI.Environment(ref args))
            {
                Intracommunicator world = Communicator.world;
                rank = Communicator.world.Rank;
                size = Communicator.world.Size;
                Communicator.world.Barrier();
                found = false;

                if (rank == 0)
                {
                    for (i = 0; i < 50; ++i)
                    {
                        if (i % 10 == 0)
                        {
                            numbers[i] = 18;
                        }
                        else
                        {
                            numbers[i] = i;
                        }
                    }
                }

                Communicator.world.Broadcast <int[]>(ref numbers, 0);
                Communicator.world.ImmediateReceive <int>(rank, 1);

                nvalues = 50 / size;
                i       = rank * nvalues;

                inrange = ((i <= ((rank + 1) * nvalues - 1)) & (i >= rank * nvalues));

                List <int> indexes = new List <int>();

                while (inrange)
                {
                    if (numbers[i] == nrToSearch)
                    {
                        temp = 23;
                        indexes.Add(i);
                        for (j = 0; j < size; ++j)
                        {
                            Communicator.world.Send <int>(temp, j, 1);
                        }
                        Console.WriteLine("Process: " + rank + " has found number " + numbers[i] + " at global index " + i + "\n");
                        found = true;
                    }
                    ++i;
                    inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues);
                }
                if (!found)
                {
                    Console.WriteLine("Process: " + rank + " stopped at global index " + (i - 1) + "\n");
                }

                int maximum = -1;
                for (i = 0; i < indexes.Count; i++)
                {
                    if (maximum < indexes[i])
                    {
                        maximum = indexes[i];
                    }
                }

                int high = world.Reduce(maximum, Operation <int> .Max, 0);
                System.Console.WriteLine("The highest Index where element was found is " + high);
            }
        }
Esempio n. 15
0
        // return true if it converges. Output: solution matrix, errors, loops it took
        public static Boolean solve(Matrix A, Matrix b, out Matrix x, out Matrix err, out int loops, Intracommunicator comm)
        {
            // check sanity. rank 0 only
            if (comm.Rank == 0 && (!A.isSquare || !b.isColumn || (A.Height != b.Height)))
            {
                Exception e = new Exception("Matrix A must be square! Matrix b must be a column matrix with the same height as matrix A!");
                throw e;
            }

            // follow samples in Wikipedia step by step https://en.wikipedia.org/wiki/Gauss%E2%80%93Seidel_method

            benchmark bm = new benchmark(), bm2 = new benchmark(), bm3 = new benchmark();
            double    sequential = 0, parallel = 0, communication = 0;

            bm.start();

            bm2.start();
            // decompose A into the sum of a lower triangular component L* and a strict upper triangular component U
            int size = 0; Matrix L = null, U = null, L_1;

            if (comm.Rank == 0)
            {
                size = A.Height;
                Matrix.Decompose(A, out L, out U);
            }
            bm2.pause();
            sequential += bm2.getElapsedSeconds();

            bm2.start();
            comm.Broadcast(ref size, 0);
            comm.Broadcast(ref U, 0);
            comm.Broadcast(ref b, 0);
            bm2.pause();
            communication += bm2.getElapsedSeconds();

            // Inverse matrix L*
            comm.Barrier();
            L_1 = MatrixParallel.Inverse(L, comm, ref sequential, ref parallel, ref communication);

            // Main iteration: x (at step k+1) = T * x (at step k) + C
            // where T = - (inverse of L*) * U, and C = (inverse of L*) * b

            // split T & C into groups of rows, each for one slave, according to the nature of this algorithm
            // each slave will have one piece of T & one piece of C stored locally. the rest of T & C is not needed
            // there might be cases where jobs > slaves, so some might get no job at all
            // Changes: only split L_1. Slaves will calculate T & C (pieces) themselves
            bm2.start();
            Matrix jobDistro = Utils.splitJob(size, comm.Size);
            int    startRow = 0, endRow = 0, myJobSize = (int)jobDistro[0, comm.Rank];

            for (int p = 0; p < comm.Size; p++)
            {
                if (p != comm.Rank)
                {
                    startRow += (int)jobDistro[0, p];
                }
                else
                {
                    endRow = startRow + (int)jobDistro[0, p] - 1;
                    break;
                }
            }
            Matrix[] L_1Ps = new Matrix[comm.Size];
            if (comm.Rank == 0)
            {
                int slaveStart = 0;
                for (int p = 0; p < comm.Size; p++)
                {
                    L_1Ps[p]    = Matrix.extractRows(L_1, slaveStart, slaveStart + (int)jobDistro[0, p] - 1);
                    slaveStart += (int)jobDistro[0, p];
                }
            }
            bm2.pause();
            sequential += bm2.getElapsedSeconds();

            bm2.start();
            Matrix L_1P = comm.Scatter(L_1Ps, 0);

            bm2.pause();
            communication += bm2.getElapsedSeconds();
            bm2.start();
            Matrix T = -L_1P * U; Matrix C = L_1P * b;

            bm2.pause();
            parallel += bm2.getElapsedSeconds();

            // the actual iteration
            // if it still doesn't converge after this many loops, assume it won't converge and give up
            Boolean converge  = false;
            int     loopLimit = 100;

            x = Matrix.zeroLike(b); // at step k
            for (loops = 0; loops < loopLimit; loops++)
            {
                bm3.start();
                // (re-)distributing x vector. Must be done every single loop
                // this loop needs x from the previous loop
                comm.Broadcast(ref x, 0);
                bm3.pause();
                communication += bm3.getElapsedSeconds();

                // calculation step
                bm3.start();
                comm.Barrier();
                Matrix new_x = T * x + C;

                // check convergence
                converge = Matrix.SomeClose(new_x, x, 1e-15, startRow);

                // collect result x
                comm.Barrier();
                x = comm.Reduce(new_x, Matrix.Concatenate, 0);

                // collect convergence. consider converged if ALL slaves claim so
                converge = comm.Reduce(converge, bothTrue, 0);
                comm.Broadcast(ref converge, 0); // make sure EVERYONE breaks/coninues
                bm3.pause();
                parallel += bm3.getElapsedSeconds();
                if (converge)
                {
                    loops++;
                    break;
                }
            }

            bm2.start();
            // round the result slightly
            err = null;
            if (comm.Rank == 0)
            {
                x.Round(1e-14);
                err = A * x - b;
                err.Round(1e-14);
            }
            bm2.pause();
            sequential += bm2.getElapsedSeconds();

            bm.pause();
            if (showBenchmark)
            {
                Console.WriteLine("Sequential part took " + sequential + " secs.");
                Console.WriteLine("Parallel part took " + parallel + " secs.");
                Console.WriteLine("Communication took " + communication + " secs.");
                Console.WriteLine("Total: " + bm.getResult() + " (" + bm.getElapsedSeconds() + " secs). Seq + Parallel: " + (sequential + parallel));
            }

            return(converge);
        }
Esempio n. 16
0
        public static Matrix Inverse(Matrix matrix, Intracommunicator comm, ref double timeS, ref double timeP, ref double timeC)
        {
            if (comm.Rank == 0 && !matrix.isSquare)
            {
                Exception e = new Exception("Matrix must be square!");
                throw e;
            }

            benchmark bm = new benchmark(), bm2 = new benchmark();

            bm.start();

            int n = 0;

            int[] perm = new int[10]; int toggle = 0; Matrix lum = null;
            if (comm.Rank == 0)
            {
                n   = matrix.dim1;
                lum = LUPDecompose(matrix, out perm, out toggle);
            }
            bm.pause();
            timeS += bm.getElapsedSeconds();

            bm.start();
            comm.Broadcast(ref n, 0);
            comm.Broadcast(ref lum, 0);
            if (comm.Rank != 0)
            {
                perm = new int[n];
            }
            comm.Broadcast(ref perm, 0);
            comm.Broadcast(ref toggle, 0);
            comm.Barrier();
            bm.pause();
            timeC += bm.getElapsedSeconds();

            if (lum == null)
            {
                return(zeroLike(matrix));
            }

            bm.start();
            Double det = 0;

            if (comm.Rank == 0)
            {
                det = Determinant(lum, perm, toggle);
            }
            bm.pause();
            timeS += bm.getElapsedSeconds();
            bm.start();
            comm.Broadcast(ref det, 0);
            comm.Barrier();
            bm.pause();
            timeC += bm.getElapsedSeconds();
            if (det == 0) // not invertible
            {
                // still return for the sake of simplicity
                // Zero matrix * any matrix = zero matrix
                // so it's never a valid answer
                return(zeroLike(matrix));
            }

            bm.pause();
            int    slaves = comm.Size;
            Matrix jobDistro = Utils.splitJob(n, slaves);
            int    startCol = 0, endCol = 0, size = (int)jobDistro[0, comm.Rank];

            for (int p = 0; p < slaves; p++)
            {
                if (p != comm.Rank)
                {
                    startCol += (int)jobDistro[0, p];
                }
                else
                {
                    endCol = startCol + (int)jobDistro[0, p] - 1;
                    break;
                }
            }
            bm.pause();
            timeP += bm.getElapsedSeconds();

            bm.start();
            Matrix result = new Matrix(n, size);

            for (int i = startCol; i < startCol + size; ++i)
            {
                double[] b = new double[n];
                for (int j = 0; j < n; ++j)
                {
                    if (i == perm[j])
                    {
                        b[j] = 1.0;
                    }
                    else
                    {
                        b[j] = 0.0;
                    }
                }
                double[] x = HelperSolve(lum, b);
                for (int j = 0; j < n; ++j)
                {
                    result[j, i - startCol] = x[j];
                }
            }
            bm.pause();
            timeP += bm.getElapsedSeconds();

            bm.start();
            // collect result
            result = comm.Reduce(result, ConcatenateColumn, 0);
            bm.pause();
            timeP += bm.getElapsedSeconds();

            return(result);
        }