public override int go()
        {
            int    i, j, k, stage, n, isize, jend, ksize, j1, buffer_size, c, m, p, jstart; /* requests(2), statuses(MPI_STATUS_SIZE, 2);*/
            double r1, r2, d, e, sm1, sm2;

            double[] s = new double[5];
            double[] in_buffer_y;
            double[] out_buffer_y;

            //---------------------------------------------------------------------
            //---------------------------------------------------------------------

            // if (timeron) timer.start(t_ysolve);

            //---------------------------------------------------------------------
            // now do a sweep on a layer-by-layer basis, i.e. sweeping through cells
            // on this node in the direction of increasing i for the forward sweep,
            // and after that reversing the direction for the backsubstitution
            //---------------------------------------------------------------------

            //---------------------------------------------------------------------
            //                          FORWARD ELIMINATION
            //---------------------------------------------------------------------
            for (stage = 0; stage < ncells; stage++)
            {
                Lhs.enterStage(stage);
                Forward.enterStage(stage);

                c = slice[stage, 1];

                jstart = 2;
                jend   = 2 + cell_size[c, 1] - 1;

                isize = cell_size[c, 0] + 2;
                ksize = cell_size[c, 2] + 2;

                buffer_size = (isize - start[c, 0] - end[c, 0]) * (ksize - start[c, 2] - end[c, 2]);

                Input_buffer_forward.Array  = in_buffer_y = new double[22 * buffer_size];
                Output_buffer_forward.Array = out_buffer_y = new double[22 * buffer_size];

                if (stage != 0)
                {
                    Shift_forward.initiate_recv();

                    Lhs.go();

                    Shift_forward.go();

                    #region read buffer
                    //---------------------------------------------------------------------
                    //            unpack the buffer
                    //---------------------------------------------------------------------
                    j  = jstart;
                    j1 = jstart + 1;
                    n  = -1;
                    //---------------------------------------------------------------------
                    //            create a running pointer
                    //---------------------------------------------------------------------
                    p = 0;
                    for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                    {
                        for (i = start[c, 0]; i < isize - end[c, 0]; i++)
                        {
                            //      Console.WriteLine("in_buffer_y_f["+ p + "] = " + in_buffer_y[p]);
                            //      Console.WriteLine("in_buffer_y_f["+ (p+1) + "] = " + in_buffer_y[p+1]);
                            lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] -
                                                     in_buffer_y[p] * lhs[c, k, j, i, n + 1];
                            lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] -
                                                     in_buffer_y[p + 1] * lhs[c, k, j, i, n + 1];
                            for (m = 0; m <= 2; m++)
                            {
                                //          Console.WriteLine("in_buffer_y_f["+ (p+2+m) + "] = " + in_buffer_y[p+2+m]);
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     in_buffer_y[p + 2 + m] * lhs[c, k, j, i, n + 1];
                            }
                            //      Console.WriteLine("in_buffer_y_f["+ (p+5) + "] = " + in_buffer_y[p+5]);
                            //      Console.WriteLine("in_buffer_y_f["+ (p+6) + "] = " + in_buffer_y[p+6]);
                            d = in_buffer_y[p + 5];;
                            e = in_buffer_y[p + 6];
                            for (m = 0; m <= 2; m++)
                            {
                                //        Console.WriteLine("in_buffer_y_f["+ (p+7+m) + "] = " + in_buffer_y[p+7+m]);
                                s[m] = in_buffer_y[p + 7 + m];
                            }
                            r1 = lhs[c, k, j, i, n + 2];
                            lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1;
                            lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1;
                            for (m = 0; m <= 2; m++)
                            {
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - s[m] * r1;
                            }
                            r2 = lhs[c, k, j1, i, n + 1];
                            lhs[c, k, j1, i, n + 2] = lhs[c, k, j1, i, n + 2] - d * r2;
                            lhs[c, k, j1, i, n + 3] = lhs[c, k, j1, i, n + 3] - e * r2;
                            for (m = 0; m <= 2; m++)
                            {
                                rhs[c, k, j1, i, m] = rhs[c, k, j1, i, m] - s[m] * r2;
                            }
                            p = p + 10;
                        }
                    }

                    for (m = 3; m <= 4; m++)
                    {
                        n = (m - 2) * 5 - 1;
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (i = start[c, 0]; i < isize - end[c, 0]; i++)
                            {
                                lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] -
                                                         in_buffer_y[p] * lhs[c, k, j, i, n + 1];
                                lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] -
                                                         in_buffer_y[p + 1] * lhs[c, k, j, i, n + 1];
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     in_buffer_y[p + 2] * lhs[c, k, j, i, n + 1];
                                d    = in_buffer_y[p + 3];
                                e    = in_buffer_y[p + 4];
                                s[m] = in_buffer_y[p + 5];
                                r1   = lhs[c, k, j, i, n + 2];
                                lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1;
                                lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1;
                                rhs[c, k, j, i, m]     = rhs[c, k, j, i, m] - s[m] * r1;
                                r2 = lhs[c, k, j1, i, n + 1];
                                lhs[c, k, j1, i, n + 2] = lhs[c, k, j1, i, n + 2] - d * r2;
                                lhs[c, k, j1, i, n + 3] = lhs[c, k, j1, i, n + 3] - e * r2;
                                rhs[c, k, j1, i, m]     = rhs[c, k, j1, i, m] - s[m] * r2;
                                p = p + 6;
                            }
                        }
                    }
                    #endregion
                }
                else
                {
                    Lhs.go();
                }

                Forward.go();

                //---------------------------------------------------------------------
                //         send information to the next processor, except when this
                //         is the last grid block;
                //---------------------------------------------------------------------

                if (stage != ncells - 1)
                {
                    #region write buffer
                    //---------------------------------------------------------------------
                    //            create a running pointer for the send buffer
                    //---------------------------------------------------------------------
                    p = 0;
                    n = -1;
                    for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                    {
                        for (i = start[c, 0]; i < isize - end[c, 0]; i++)
                        {
                            for (j = jend - 1; j <= jend; j++)
                            {
                                out_buffer_y[p]     = lhs[c, k, j, i, n + 4];
                                out_buffer_y[p + 1] = lhs[c, k, j, i, n + 5];
                                //     Console.WriteLine("out_buffer_y["+ p + "] = " + out_buffer_y[p]);
                                //    Console.WriteLine("out_buffer_y["+ (p+1) + "] = " + out_buffer_y[p+1]);
                                for (m = 0; m <= 2; m++)
                                {
                                    out_buffer_y[p + 2 + m] = rhs[c, k, j, i, m];
                                    //       Console.WriteLine("out_buffer_y["+ (p+2+m) + "] = " + out_buffer_y[p+2+m]);
                                }
                                p = p + 5;
                            }
                        }
                    }

                    for (m = 3; m <= 4; m++)
                    {
                        n = (m - 2) * 5 - 1;
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (i = start[c, 0]; i < isize - end[c, 0]; i++)
                            {
                                for (j = jend - 1; j <= jend; j++)
                                {
                                    out_buffer_y[p]     = lhs[c, k, j, i, n + 4];
                                    out_buffer_y[p + 1] = lhs[c, k, j, i, n + 5];
                                    out_buffer_y[p + 2] = rhs[c, k, j, i, m];
                                    //     Console.WriteLine("out_buffer_y["+ (p) + "] = " + out_buffer_y[p]);
                                    //    Console.WriteLine("out_buffer_y["+ (p+1) + "] = " + out_buffer_y[p+1]);
                                    //     Console.WriteLine("out_buffer_y["+ (p+2) + "] = " + out_buffer_y[p+2]);
                                    p = p + 3;
                                }
                            }
                        }
                    }

                    #endregion

                    Shift_forward.initiate_send();
                }
            }

            //---------------------------------------------------------------------
            //      now go in the reverse direction
            //---------------------------------------------------------------------

            //---------------------------------------------------------------------
            //                         BACKSUBSTITUTION
            //---------------------------------------------------------------------
            for (stage = ncells - 1; stage >= 0; stage--)
            {
                Backward.enterStage(stage);

                c = slice[stage, 1];

                jstart = 2;
                jend   = 2 + cell_size[c, 1] - 1;

                isize = cell_size[c, 0] + 2;
                ksize = cell_size[c, 2] + 2;

                buffer_size = (isize - start[c, 0] - end[c, 0]) *
                              (ksize - start[c, 2] - end[c, 2]);

                Input_buffer_backward.Array  = in_buffer_y = new double[10 * buffer_size];
                Output_buffer_backward.Array = out_buffer_y = new double[10 * buffer_size];

                if (stage != ncells - 1)
                {
                    Shift_backward.initiate_recv();

                    Matvecproduct.enterStage(stage + 1);
                    Matvecproduct.go();

                    Shift_backward.go();

                    #region read_buffer
                    //---------------------------------------------------------------------
                    //            unpack the buffer for the first three factors
                    //---------------------------------------------------------------------
                    n  = -1;
                    p  = 0;
                    j  = jend;
                    j1 = j - 1;
                    for (m = 0; m <= 2; m++)
                    {
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (i = start[c, 0]; i < isize - end[c, 0]; i++)
                            {
                                //      Console.WriteLine("in_buffer_y["+ p + "] = " + in_buffer_y[p]);
                                //      Console.WriteLine("in_buffer_y["+ (p+1) + "] = " + in_buffer_y[p+1]);
                                sm1 = in_buffer_y[p];
                                sm2 = in_buffer_y[p + 1];
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     lhs[c, k, j, i, n + 4] * sm1 -
                                                     lhs[c, k, j, i, n + 5] * sm2;
                                rhs[c, k, j1, i, m] = rhs[c, k, j1, i, m] -
                                                      lhs[c, k, j1, i, n + 4] * rhs[c, k, j, i, m] -
                                                      lhs[c, k, j1, i, n + 5] * sm1;
                                p = p + 2;
                            }
                        }
                    }

                    //---------------------------------------------------------------------
                    //            now unpack the buffer for the remaining two factors
                    //---------------------------------------------------------------------
                    for (m = 3; m <= 4; m++)
                    {
                        n = (m - 2) * 5 - 1;
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (i = start[c, 0]; i < isize - end[c, 0]; i++)
                            {
                                //     Console.WriteLine("in_buffer_y["+ p + "] = " + in_buffer_y[p]);
                                //     Console.WriteLine("in_buffer_y["+ (p+1) + "] = " + in_buffer_y[p+1]);
                                sm1 = in_buffer_y[p];
                                sm2 = in_buffer_y[p + 1];
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     lhs[c, k, j, i, n + 4] * sm1 -
                                                     lhs[c, k, j, i, n + 5] * sm2;
                                rhs[c, k, j1, i, m] = rhs[c, k, j1, i, m] -
                                                      lhs[c, k, j1, i, n + 4] * rhs[c, k, j, i, m] -
                                                      lhs[c, k, j1, i, n + 5] * sm1;
                                p = p + 2;
                            }
                        }
                    }
                    #endregion
                }
                else
                {
                    Backward.init();
                }

                Backward.go();

                //---------------------------------------------------------------------
                //         send on information to the previous processor, if needed
                //---------------------------------------------------------------------
                if (stage != 0)
                {
                    #region write buffer

                    j  = jstart;
                    j1 = jstart + 1;
                    p  = 0;
                    for (m = 0; m <= 4; m++)
                    {
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (i = start[c, 0]; i < isize - end[c, 0]; i++)
                            {
                                out_buffer_y[p]     = rhs[c, k, j, i, m];
                                out_buffer_y[p + 1] = rhs[c, k, j1, i, m];
                                p = p + 2;
                            }
                        }
                    }

                    #endregion

                    Shift_backward.initiate_send();
                }

                //---------------------------------------------------------------------
                //         If this was the last stage, do the block-diagonal inversion
                //---------------------------------------------------------------------
                if (stage == 0)
                {
                    Matvecproduct.enterStage(stage);
                    Matvecproduct.go();
                }
            }

            return(0);
        }         // end activate method
Ejemplo n.º 2
0
        public override int go()
        {
            int    c, i, j, k, n, iend, jsize, ksize, i1, i2, m, buffer_size, p, istart, stage;
            double r1, r2, d, e, sm1, sm2, fac1, fac2;

            double[] s = new double[5];
            double[] in_buffer_x;
            double[] out_buffer_x;

            for (stage = 0; stage < ncells; stage++)
            {
                Forward.enterStage(stage);
                Lhs.enterStage(stage);

                c = slice[stage, 0];

                istart = 2;
                iend   = 2 + cell_size[c, 0] - 1;

                jsize = cell_size[c, 1] + 2;
                ksize = cell_size[c, 2] + 2;

                buffer_size = (jsize - start[c, 1] - end[c, 1]) *
                              (ksize - start[c, 2] - end[c, 2]);

                Input_buffer_forward.Array  = in_buffer_x = new double[22 * buffer_size];
                Output_buffer_forward.Array = out_buffer_x = new double[22 * buffer_size];

                if (stage != 0)
                {
                    Shift_forward.initiate_recv();

                    Lhs.go();

                    Shift_forward.go();

                    #region read buffer
                    //---------------------------------------------------------------------
                    //            unpack the buffer
                    //---------------------------------------------------------------------
                    i  = istart;
                    i1 = istart + 1;
                    n  = -1;

                    //---------------------------------------------------------------------
                    //            create a running pointer
                    //---------------------------------------------------------------------

                    p = 0;
                    for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                    {
                        for (j = start[c, 1]; j < jsize - end[c, 1]; j++)
                        {
                            //Console.WriteLine("in_buffer_x["+ p + "] = " + in_buffer_x[p]);
                            //Console.WriteLine("in_buffer_x["+ (p+1) + "] = " +in_buffer_x[p+1]);

                            lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] -
                                                     in_buffer_x[p] * lhs[c, k, j, i, n + 1];
                            lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] -
                                                     in_buffer_x[p + 1] * lhs[c, k, j, i, n + 1];
                            for (m = 0; m <= 2; m++)
                            {
                                // Console.WriteLine("in_buffer_x["+ (p+2+m) + "] = " +in_buffer_x[p+2+m]);
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     in_buffer_x[p + 2 + m] * lhs[c, k, j, i, n + 1];
                            }
                            //Console.WriteLine("in_buffer_x["+ (p+5) + "] = " +in_buffer_x[p+5]);
                            //Console.WriteLine("in_buffer_x["+ (p+6) + "] = " +in_buffer_x[p+6]);

                            d = in_buffer_x[p + 5];
                            e = in_buffer_x[p + 6];
                            for (m = 0; m <= 2; m++)
                            {
                                //Console.WriteLine("in_buffer_x["+ (p+5) + "] = " +in_buffer_x[p+5]);
                                //Console.WriteLine(in_buffer_x[p+7+m]);
                                s[m] = in_buffer_x[p + 7 + m];
                            }
                            r1 = lhs[c, k, j, i, n + 2];
                            lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1;
                            lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1;
                            for (m = 0; m <= 2; m++)
                            {
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - s[m] * r1;
                            }
                            r2 = lhs[c, k, j, i1, n + 1];
                            lhs[c, k, j, i1, n + 2] = lhs[c, k, j, i1, n + 2] - d * r2;
                            lhs[c, k, j, i1, n + 3] = lhs[c, k, j, i1, n + 3] - e * r2;
                            for (m = 0; m <= 2; m++)
                            {
                                rhs[c, k, j, i1, m] = rhs[c, k, j, i1, m] - s[m] * r2;
                            }
                            p = p + 10;
                        }
                    }

                    for (m = 3; m <= 4; m++)
                    {
                        n = (m - 2) * 5 - 1;
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (j = start[c, 1]; j < jsize - end[c, 1]; j++)
                            {
                                lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] -
                                                         in_buffer_x[p] * lhs[c, k, j, i, n + 1];
                                lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] -
                                                         in_buffer_x[p + 1] * lhs[c, k, j, i, n + 2];
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     in_buffer_x[p + 2] * lhs[c, k, j, i, n + 1];
                                d    = in_buffer_x[p + 3];
                                e    = in_buffer_x[p + 4];
                                s[m] = in_buffer_x[p + 5];
                                r1   = lhs[c, k, j, i, n + 2];
                                lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1;
                                lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1;
                                rhs[c, k, j, i, m]     = rhs[c, k, j, i, m] - s[m] * r1;
                                r2 = lhs[c, k, j, i1, n + 1];
                                lhs[c, k, j, i1, n + 2] = lhs[c, k, j, i1, n + 2] - d * r2;
                                lhs[c, k, j, i1, n + 3] = lhs[c, k, j, i1, n + 3] - e * r2;
                                rhs[c, k, j, i1, m]     = rhs[c, k, j, i1, m] - s[m] * r2;
                                p = p + 6;
                            }
                        }
                    }
                    #endregion
                }
                else
                {
                    Lhs.go();
                }


                Forward.go();

                //---------------------------------------------------------------------
                //         send information to the next processor, except when this
                //         is the last grid block
                //---------------------------------------------------------------------
                if (stage != ncells - 1)
                {
                    #region write buffer
                    //---------------------------------------------------------------------
                    //            create a running pointer for the send buffer
                    //---------------------------------------------------------------------
                    p = 0;
                    n = -1;
                    for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                    {
                        for (j = start[c, 1]; j < jsize - end[c, 1]; j++)
                        {
                            for (i = iend - 1; i <= iend; i++)
                            {
                                out_buffer_x[p]     = lhs[c, k, j, i, n + 4];
                                out_buffer_x[p + 1] = lhs[c, k, j, i, n + 5];
                                for (m = 0; m <= 2; m++)
                                {
                                    out_buffer_x[p + 2 + m] = rhs[c, k, j, i, m];
                                }
                                p = p + 5;
                            }
                        }
                    }

                    for (m = 3; m <= 4; m++)
                    {
                        n = (m - 2) * 5 - 1;
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (j = start[c, 1]; j < jsize - end[c, 1]; j++)
                            {
                                for (i = iend - 1; i <= iend; i++)
                                {
                                    out_buffer_x[p]     = lhs[c, k, j, i, n + 4];
                                    out_buffer_x[p + 1] = lhs[c, k, j, i, n + 5];
                                    out_buffer_x[p + 2] = rhs[c, k, j, i, m];
                                    p = p + 3;
                                }
                            }
                        }
                    }
                    #endregion

                    Shift_forward.initiate_send();
                }
            }         // cells loop

            //---------------------------------------------------------------------
            //                         BACKSUBSTITUTION
            //---------------------------------------------------------------------

            for (stage = ncells - 1; stage >= 0; stage--)
            {
                Backward.enterStage(stage);

                c = slice[stage, 0];

                istart = 2;
                iend   = 2 + cell_size[c, 0] - 1;

                jsize = cell_size[c, 1] + 2;
                ksize = cell_size[c, 2] + 2;

                buffer_size = (jsize - start[c, 1] - end[c, 1]) * (ksize - start[c, 2] - end[c, 2]);

                in_buffer_x  = Input_buffer_backward.Array = new double[10 * buffer_size];
                out_buffer_x = Output_buffer_backward.Array = new double[10 * buffer_size];

                if (stage != ncells - 1)
                {
                    Shift_backward.initiate_recv();

                    Matvecproduct.enterStage(stage + 1);
                    Matvecproduct.go();

                    Shift_backward.go();

                    #region read_buffer_x_back

                    //---------------------------------------------------------------------
                    //            unpack the buffer for the first three factors
                    //---------------------------------------------------------------------
                    n  = -1;
                    p  = 0;
                    i  = iend;
                    i1 = i - 1;
                    for (m = 0; m <= 2; m++)
                    {
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (j = start[c, 1]; j < jsize - end[c, 1]; j++)
                            {
                                //  Console.WriteLine("in_buffer_x["+ p + "] = " + in_buffer_x[p]);
                                //  Console.WriteLine("in_buffer_x["+ (p+1) + "] = " + in_buffer_x[p+1]);
                                sm1 = in_buffer_x[p];
                                sm2 = in_buffer_x[p + 1];
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     lhs[c, k, j, i, n + 4] * sm1 -
                                                     lhs[c, k, j, i, n + 5] * sm2;
                                rhs[c, k, j, i1, m] = rhs[c, k, j, i1, m] -
                                                      lhs[c, k, j, i1, n + 4] * rhs[c, k, j, i, m] -
                                                      lhs[c, k, j, i1, n + 5] * sm1;
                                p = p + 2;
                            }
                        }
                    }

                    //---------------------------------------------------------------------
                    //            now unpack the buffer for the remaining two factors
                    //---------------------------------------------------------------------
                    for (m = 3; m <= 4; m++)
                    {
                        n = (m - 2) * 5 - 1;
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (j = start[c, 1]; j < jsize - end[c, 1]; j++)
                            {
                                //  Console.WriteLine("in_buffer_x["+ p + "] = " + in_buffer_x[p]);
                                //  Console.WriteLine("in_buffer_x["+ (p+1) + "] = " + in_buffer_x[p+1]);
                                sm1 = in_buffer_x[p];
                                sm2 = in_buffer_x[p + 1];
                                rhs[c, k, j, i, m] = rhs[c, k, j, i, m] -
                                                     lhs[c, k, j, i, n + 4] * sm1 -
                                                     lhs[c, k, j, i, n + 5] * sm2;
                                rhs[c, k, j, i1, m] = rhs[c, k, j, i1, m] -
                                                      lhs[c, k, j, i1, n + 4] * rhs[c, k, j, i, m] -
                                                      lhs[c, k, j, i1, n + 5] * sm1;
                                p = p + 2;
                            }
                        }
                    }
                    #endregion
                }
                else
                {
                    Backward.init();
                }

                Backward.go();

                //---------------------------------------------------------------------
                //         send on information to the previous processor, if needed
                //---------------------------------------------------------------------
                if (stage != 0)
                {
                    #region write buffer
                    i  = istart;
                    i1 = istart + 1;
                    p  = 0;
                    for (m = 0; m <= 4; m++)
                    {
                        for (k = start[c, 2]; k < ksize - end[c, 2]; k++)
                        {
                            for (j = start[c, 1]; j < jsize - end[c, 1]; j++)
                            {
                                out_buffer_x[p]     = rhs[c, k, j, i, m];
                                out_buffer_x[p + 1] = rhs[c, k, j, i1, m];
                                p = p + 2;
                            }
                        }
                    }
                    #endregion

                    Shift_backward.initiate_send();
                }

                //if (timeron) timer.stop(t_xsolve);

                //---------------------------------------------------------------------
                //         If this was the last stage, do the block-diagonal inversion
                //---------------------------------------------------------------------
                if (stage == 0)
                {
                    Matvecproduct.enterStage(stage);
                    Matvecproduct.go();
                }
            }

            return(0);
        }         // end activate method