public ResultOption Solve(MPI.Intracommunicator mpi)
        {
            IsMaster = mpi.Rank == 0;

            mpi.Broadcast(ref n, Root);

            Validate(mpi);

            int n_block = N / mpi.Size;

            double[] aFlattened = null;
            if (IsMaster)
            {
                aFlattened = new double[N * N];
                ToArray(a, ref aFlattened);
            }

            if (!IsMaster) b = new double[N];
            // send B across all processes
            mpi.Broadcast(ref b, Root);

            // send A
            double[] a_block = new double[n_block];
            mpi.ScatterFromFlattened(aFlattened, N * n_block, Root, ref a_block);

            // x0 = (0, 0, ..., 0)
            double[] x = new double[N];

            // r = b - A*x0;
            double[] residual_block = new double[n_block];
            CalculateResidue(ref residual_block, b, a_block, x, mpi.Rank);

            double[] p_block = new double[n_block];
            Array.Copy(residual_block, p_block, n_block);

            double residualOld_block = MatrixUtil.DotProduct(residual_block, residual_block);
            double residualOld = mpi.Allreduce(residualOld_block, MPI.Operation<double>.Add);

            double rsnew;
            double[] x_block = new double[n_block];
            double[] p = new double[N];

            var stopwatch = new Stopwatch();
            stopwatch.Start();

            double[] Ap;
            double pAp_block;
            double pAp;
            double alpha;
            double rsnew_block;

            int iteration;
            for (iteration = 0; iteration < N; iteration++)
            {
                ToArray(mpi.Allgather(p_block), ref p);

                Ap = MultiDotProduct(a_block, p);

                pAp_block = MatrixUtil.DotProduct(p_block, Ap);

                pAp = mpi.Allreduce(pAp_block, MPI.Operation<double>.Add);

                alpha = residualOld / pAp;

                MatrixUtil.Add(ref x_block, p_block, 1, alpha);

                MatrixUtil.Add(ref residual_block, Ap, 1, -alpha);

                rsnew_block = MatrixUtil.DotProduct(residual_block, residual_block);
                rsnew = mpi.Allreduce(rsnew_block, MPI.Operation<double>.Add);

                if (rsnew <= 1e-15)
                {
                    break;
                }

                MatrixUtil.Add(ref p_block, residual_block, rsnew / residualOld, 1);

                residualOld = rsnew;
            }

            ToArray(mpi.Allgather(x_block), ref x);
            X = x;

            stopwatch.Stop();

            return new ResultOption { ConvergenceIteration = iteration, SolveTime = stopwatch.ElapsedMilliseconds };
        }