public override int go() { int i, j, k, stage, n, isize, jend, ksize, j1, buffer_size, c, m, p, jstart; /* requests(2), statuses(MPI_STATUS_SIZE, 2);*/ double r1, r2, d, e, sm1, sm2; double[] s = new double[5]; double[] in_buffer_y; double[] out_buffer_y; //--------------------------------------------------------------------- //--------------------------------------------------------------------- // if (timeron) timer.start(t_ysolve); //--------------------------------------------------------------------- // now do a sweep on a layer-by-layer basis, i.e. sweeping through cells // on this node in the direction of increasing i for the forward sweep, // and after that reversing the direction for the backsubstitution //--------------------------------------------------------------------- //--------------------------------------------------------------------- // FORWARD ELIMINATION //--------------------------------------------------------------------- for (stage = 0; stage < ncells; stage++) { Lhs.enterStage(stage); Forward.enterStage(stage); c = slice[stage, 1]; jstart = 2; jend = 2 + cell_size[c, 1] - 1; isize = cell_size[c, 0] + 2; ksize = cell_size[c, 2] + 2; buffer_size = (isize - start[c, 0] - end[c, 0]) * (ksize - start[c, 2] - end[c, 2]); Input_buffer_forward.Array = in_buffer_y = new double[22 * buffer_size]; Output_buffer_forward.Array = out_buffer_y = new double[22 * buffer_size]; if (stage != 0) { Shift_forward.initiate_recv(); Lhs.go(); Shift_forward.go(); #region read buffer //--------------------------------------------------------------------- // unpack the buffer //--------------------------------------------------------------------- j = jstart; j1 = jstart + 1; n = -1; //--------------------------------------------------------------------- // create a running pointer //--------------------------------------------------------------------- p = 0; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (i = start[c, 0]; i < isize - end[c, 0]; i++) { // Console.WriteLine("in_buffer_y_f["+ p + "] = " + in_buffer_y[p]); // Console.WriteLine("in_buffer_y_f["+ (p+1) + "] = " + in_buffer_y[p+1]); lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] - in_buffer_y[p] * lhs[c, k, j, i, n + 1]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - in_buffer_y[p + 1] * lhs[c, k, j, i, n + 1]; for (m = 0; m <= 2; m++) { // Console.WriteLine("in_buffer_y_f["+ (p+2+m) + "] = " + in_buffer_y[p+2+m]); rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - in_buffer_y[p + 2 + m] * lhs[c, k, j, i, n + 1]; } // Console.WriteLine("in_buffer_y_f["+ (p+5) + "] = " + in_buffer_y[p+5]); // Console.WriteLine("in_buffer_y_f["+ (p+6) + "] = " + in_buffer_y[p+6]); d = in_buffer_y[p + 5];; e = in_buffer_y[p + 6]; for (m = 0; m <= 2; m++) { // Console.WriteLine("in_buffer_y_f["+ (p+7+m) + "] = " + in_buffer_y[p+7+m]); s[m] = in_buffer_y[p + 7 + m]; } r1 = lhs[c, k, j, i, n + 2]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1; lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1; for (m = 0; m <= 2; m++) { rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - s[m] * r1; } r2 = lhs[c, k, j1, i, n + 1]; lhs[c, k, j1, i, n + 2] = lhs[c, k, j1, i, n + 2] - d * r2; lhs[c, k, j1, i, n + 3] = lhs[c, k, j1, i, n + 3] - e * r2; for (m = 0; m <= 2; m++) { rhs[c, k, j1, i, m] = rhs[c, k, j1, i, m] - s[m] * r2; } p = p + 10; } } for (m = 3; m <= 4; m++) { n = (m - 2) * 5 - 1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (i = start[c, 0]; i < isize - end[c, 0]; i++) { lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] - in_buffer_y[p] * lhs[c, k, j, i, n + 1]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - in_buffer_y[p + 1] * lhs[c, k, j, i, n + 1]; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - in_buffer_y[p + 2] * lhs[c, k, j, i, n + 1]; d = in_buffer_y[p + 3]; e = in_buffer_y[p + 4]; s[m] = in_buffer_y[p + 5]; r1 = lhs[c, k, j, i, n + 2]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1; lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - s[m] * r1; r2 = lhs[c, k, j1, i, n + 1]; lhs[c, k, j1, i, n + 2] = lhs[c, k, j1, i, n + 2] - d * r2; lhs[c, k, j1, i, n + 3] = lhs[c, k, j1, i, n + 3] - e * r2; rhs[c, k, j1, i, m] = rhs[c, k, j1, i, m] - s[m] * r2; p = p + 6; } } } #endregion } else { Lhs.go(); } Forward.go(); //--------------------------------------------------------------------- // send information to the next processor, except when this // is the last grid block; //--------------------------------------------------------------------- if (stage != ncells - 1) { #region write buffer //--------------------------------------------------------------------- // create a running pointer for the send buffer //--------------------------------------------------------------------- p = 0; n = -1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (i = start[c, 0]; i < isize - end[c, 0]; i++) { for (j = jend - 1; j <= jend; j++) { out_buffer_y[p] = lhs[c, k, j, i, n + 4]; out_buffer_y[p + 1] = lhs[c, k, j, i, n + 5]; // Console.WriteLine("out_buffer_y["+ p + "] = " + out_buffer_y[p]); // Console.WriteLine("out_buffer_y["+ (p+1) + "] = " + out_buffer_y[p+1]); for (m = 0; m <= 2; m++) { out_buffer_y[p + 2 + m] = rhs[c, k, j, i, m]; // Console.WriteLine("out_buffer_y["+ (p+2+m) + "] = " + out_buffer_y[p+2+m]); } p = p + 5; } } } for (m = 3; m <= 4; m++) { n = (m - 2) * 5 - 1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (i = start[c, 0]; i < isize - end[c, 0]; i++) { for (j = jend - 1; j <= jend; j++) { out_buffer_y[p] = lhs[c, k, j, i, n + 4]; out_buffer_y[p + 1] = lhs[c, k, j, i, n + 5]; out_buffer_y[p + 2] = rhs[c, k, j, i, m]; // Console.WriteLine("out_buffer_y["+ (p) + "] = " + out_buffer_y[p]); // Console.WriteLine("out_buffer_y["+ (p+1) + "] = " + out_buffer_y[p+1]); // Console.WriteLine("out_buffer_y["+ (p+2) + "] = " + out_buffer_y[p+2]); p = p + 3; } } } } #endregion Shift_forward.initiate_send(); } } //--------------------------------------------------------------------- // now go in the reverse direction //--------------------------------------------------------------------- //--------------------------------------------------------------------- // BACKSUBSTITUTION //--------------------------------------------------------------------- for (stage = ncells - 1; stage >= 0; stage--) { Backward.enterStage(stage); c = slice[stage, 1]; jstart = 2; jend = 2 + cell_size[c, 1] - 1; isize = cell_size[c, 0] + 2; ksize = cell_size[c, 2] + 2; buffer_size = (isize - start[c, 0] - end[c, 0]) * (ksize - start[c, 2] - end[c, 2]); Input_buffer_backward.Array = in_buffer_y = new double[10 * buffer_size]; Output_buffer_backward.Array = out_buffer_y = new double[10 * buffer_size]; if (stage != ncells - 1) { Shift_backward.initiate_recv(); Matvecproduct.enterStage(stage + 1); Matvecproduct.go(); Shift_backward.go(); #region read_buffer //--------------------------------------------------------------------- // unpack the buffer for the first three factors //--------------------------------------------------------------------- n = -1; p = 0; j = jend; j1 = j - 1; for (m = 0; m <= 2; m++) { for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (i = start[c, 0]; i < isize - end[c, 0]; i++) { // Console.WriteLine("in_buffer_y["+ p + "] = " + in_buffer_y[p]); // Console.WriteLine("in_buffer_y["+ (p+1) + "] = " + in_buffer_y[p+1]); sm1 = in_buffer_y[p]; sm2 = in_buffer_y[p + 1]; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - lhs[c, k, j, i, n + 4] * sm1 - lhs[c, k, j, i, n + 5] * sm2; rhs[c, k, j1, i, m] = rhs[c, k, j1, i, m] - lhs[c, k, j1, i, n + 4] * rhs[c, k, j, i, m] - lhs[c, k, j1, i, n + 5] * sm1; p = p + 2; } } } //--------------------------------------------------------------------- // now unpack the buffer for the remaining two factors //--------------------------------------------------------------------- for (m = 3; m <= 4; m++) { n = (m - 2) * 5 - 1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (i = start[c, 0]; i < isize - end[c, 0]; i++) { // Console.WriteLine("in_buffer_y["+ p + "] = " + in_buffer_y[p]); // Console.WriteLine("in_buffer_y["+ (p+1) + "] = " + in_buffer_y[p+1]); sm1 = in_buffer_y[p]; sm2 = in_buffer_y[p + 1]; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - lhs[c, k, j, i, n + 4] * sm1 - lhs[c, k, j, i, n + 5] * sm2; rhs[c, k, j1, i, m] = rhs[c, k, j1, i, m] - lhs[c, k, j1, i, n + 4] * rhs[c, k, j, i, m] - lhs[c, k, j1, i, n + 5] * sm1; p = p + 2; } } } #endregion } else { Backward.init(); } Backward.go(); //--------------------------------------------------------------------- // send on information to the previous processor, if needed //--------------------------------------------------------------------- if (stage != 0) { #region write buffer j = jstart; j1 = jstart + 1; p = 0; for (m = 0; m <= 4; m++) { for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (i = start[c, 0]; i < isize - end[c, 0]; i++) { out_buffer_y[p] = rhs[c, k, j, i, m]; out_buffer_y[p + 1] = rhs[c, k, j1, i, m]; p = p + 2; } } } #endregion Shift_backward.initiate_send(); } //--------------------------------------------------------------------- // If this was the last stage, do the block-diagonal inversion //--------------------------------------------------------------------- if (stage == 0) { Matvecproduct.enterStage(stage); Matvecproduct.go(); } } return(0); } // end activate method
public override int go() { int c, i, j, k, n, iend, jsize, ksize, i1, i2, m, buffer_size, p, istart, stage; double r1, r2, d, e, sm1, sm2, fac1, fac2; double[] s = new double[5]; double[] in_buffer_x; double[] out_buffer_x; for (stage = 0; stage < ncells; stage++) { Forward.enterStage(stage); Lhs.enterStage(stage); c = slice[stage, 0]; istart = 2; iend = 2 + cell_size[c, 0] - 1; jsize = cell_size[c, 1] + 2; ksize = cell_size[c, 2] + 2; buffer_size = (jsize - start[c, 1] - end[c, 1]) * (ksize - start[c, 2] - end[c, 2]); Input_buffer_forward.Array = in_buffer_x = new double[22 * buffer_size]; Output_buffer_forward.Array = out_buffer_x = new double[22 * buffer_size]; if (stage != 0) { Shift_forward.initiate_recv(); Lhs.go(); Shift_forward.go(); #region read buffer //--------------------------------------------------------------------- // unpack the buffer //--------------------------------------------------------------------- i = istart; i1 = istart + 1; n = -1; //--------------------------------------------------------------------- // create a running pointer //--------------------------------------------------------------------- p = 0; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (j = start[c, 1]; j < jsize - end[c, 1]; j++) { //Console.WriteLine("in_buffer_x["+ p + "] = " + in_buffer_x[p]); //Console.WriteLine("in_buffer_x["+ (p+1) + "] = " +in_buffer_x[p+1]); lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] - in_buffer_x[p] * lhs[c, k, j, i, n + 1]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - in_buffer_x[p + 1] * lhs[c, k, j, i, n + 1]; for (m = 0; m <= 2; m++) { // Console.WriteLine("in_buffer_x["+ (p+2+m) + "] = " +in_buffer_x[p+2+m]); rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - in_buffer_x[p + 2 + m] * lhs[c, k, j, i, n + 1]; } //Console.WriteLine("in_buffer_x["+ (p+5) + "] = " +in_buffer_x[p+5]); //Console.WriteLine("in_buffer_x["+ (p+6) + "] = " +in_buffer_x[p+6]); d = in_buffer_x[p + 5]; e = in_buffer_x[p + 6]; for (m = 0; m <= 2; m++) { //Console.WriteLine("in_buffer_x["+ (p+5) + "] = " +in_buffer_x[p+5]); //Console.WriteLine(in_buffer_x[p+7+m]); s[m] = in_buffer_x[p + 7 + m]; } r1 = lhs[c, k, j, i, n + 2]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1; lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1; for (m = 0; m <= 2; m++) { rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - s[m] * r1; } r2 = lhs[c, k, j, i1, n + 1]; lhs[c, k, j, i1, n + 2] = lhs[c, k, j, i1, n + 2] - d * r2; lhs[c, k, j, i1, n + 3] = lhs[c, k, j, i1, n + 3] - e * r2; for (m = 0; m <= 2; m++) { rhs[c, k, j, i1, m] = rhs[c, k, j, i1, m] - s[m] * r2; } p = p + 10; } } for (m = 3; m <= 4; m++) { n = (m - 2) * 5 - 1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (j = start[c, 1]; j < jsize - end[c, 1]; j++) { lhs[c, k, j, i, n + 2] = lhs[c, k, j, i, n + 2] - in_buffer_x[p] * lhs[c, k, j, i, n + 1]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - in_buffer_x[p + 1] * lhs[c, k, j, i, n + 2]; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - in_buffer_x[p + 2] * lhs[c, k, j, i, n + 1]; d = in_buffer_x[p + 3]; e = in_buffer_x[p + 4]; s[m] = in_buffer_x[p + 5]; r1 = lhs[c, k, j, i, n + 2]; lhs[c, k, j, i, n + 3] = lhs[c, k, j, i, n + 3] - d * r1; lhs[c, k, j, i, n + 4] = lhs[c, k, j, i, n + 4] - e * r1; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - s[m] * r1; r2 = lhs[c, k, j, i1, n + 1]; lhs[c, k, j, i1, n + 2] = lhs[c, k, j, i1, n + 2] - d * r2; lhs[c, k, j, i1, n + 3] = lhs[c, k, j, i1, n + 3] - e * r2; rhs[c, k, j, i1, m] = rhs[c, k, j, i1, m] - s[m] * r2; p = p + 6; } } } #endregion } else { Lhs.go(); } Forward.go(); //--------------------------------------------------------------------- // send information to the next processor, except when this // is the last grid block //--------------------------------------------------------------------- if (stage != ncells - 1) { #region write buffer //--------------------------------------------------------------------- // create a running pointer for the send buffer //--------------------------------------------------------------------- p = 0; n = -1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (j = start[c, 1]; j < jsize - end[c, 1]; j++) { for (i = iend - 1; i <= iend; i++) { out_buffer_x[p] = lhs[c, k, j, i, n + 4]; out_buffer_x[p + 1] = lhs[c, k, j, i, n + 5]; for (m = 0; m <= 2; m++) { out_buffer_x[p + 2 + m] = rhs[c, k, j, i, m]; } p = p + 5; } } } for (m = 3; m <= 4; m++) { n = (m - 2) * 5 - 1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (j = start[c, 1]; j < jsize - end[c, 1]; j++) { for (i = iend - 1; i <= iend; i++) { out_buffer_x[p] = lhs[c, k, j, i, n + 4]; out_buffer_x[p + 1] = lhs[c, k, j, i, n + 5]; out_buffer_x[p + 2] = rhs[c, k, j, i, m]; p = p + 3; } } } } #endregion Shift_forward.initiate_send(); } } // cells loop //--------------------------------------------------------------------- // BACKSUBSTITUTION //--------------------------------------------------------------------- for (stage = ncells - 1; stage >= 0; stage--) { Backward.enterStage(stage); c = slice[stage, 0]; istart = 2; iend = 2 + cell_size[c, 0] - 1; jsize = cell_size[c, 1] + 2; ksize = cell_size[c, 2] + 2; buffer_size = (jsize - start[c, 1] - end[c, 1]) * (ksize - start[c, 2] - end[c, 2]); in_buffer_x = Input_buffer_backward.Array = new double[10 * buffer_size]; out_buffer_x = Output_buffer_backward.Array = new double[10 * buffer_size]; if (stage != ncells - 1) { Shift_backward.initiate_recv(); Matvecproduct.enterStage(stage + 1); Matvecproduct.go(); Shift_backward.go(); #region read_buffer_x_back //--------------------------------------------------------------------- // unpack the buffer for the first three factors //--------------------------------------------------------------------- n = -1; p = 0; i = iend; i1 = i - 1; for (m = 0; m <= 2; m++) { for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (j = start[c, 1]; j < jsize - end[c, 1]; j++) { // Console.WriteLine("in_buffer_x["+ p + "] = " + in_buffer_x[p]); // Console.WriteLine("in_buffer_x["+ (p+1) + "] = " + in_buffer_x[p+1]); sm1 = in_buffer_x[p]; sm2 = in_buffer_x[p + 1]; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - lhs[c, k, j, i, n + 4] * sm1 - lhs[c, k, j, i, n + 5] * sm2; rhs[c, k, j, i1, m] = rhs[c, k, j, i1, m] - lhs[c, k, j, i1, n + 4] * rhs[c, k, j, i, m] - lhs[c, k, j, i1, n + 5] * sm1; p = p + 2; } } } //--------------------------------------------------------------------- // now unpack the buffer for the remaining two factors //--------------------------------------------------------------------- for (m = 3; m <= 4; m++) { n = (m - 2) * 5 - 1; for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (j = start[c, 1]; j < jsize - end[c, 1]; j++) { // Console.WriteLine("in_buffer_x["+ p + "] = " + in_buffer_x[p]); // Console.WriteLine("in_buffer_x["+ (p+1) + "] = " + in_buffer_x[p+1]); sm1 = in_buffer_x[p]; sm2 = in_buffer_x[p + 1]; rhs[c, k, j, i, m] = rhs[c, k, j, i, m] - lhs[c, k, j, i, n + 4] * sm1 - lhs[c, k, j, i, n + 5] * sm2; rhs[c, k, j, i1, m] = rhs[c, k, j, i1, m] - lhs[c, k, j, i1, n + 4] * rhs[c, k, j, i, m] - lhs[c, k, j, i1, n + 5] * sm1; p = p + 2; } } } #endregion } else { Backward.init(); } Backward.go(); //--------------------------------------------------------------------- // send on information to the previous processor, if needed //--------------------------------------------------------------------- if (stage != 0) { #region write buffer i = istart; i1 = istart + 1; p = 0; for (m = 0; m <= 4; m++) { for (k = start[c, 2]; k < ksize - end[c, 2]; k++) { for (j = start[c, 1]; j < jsize - end[c, 1]; j++) { out_buffer_x[p] = rhs[c, k, j, i, m]; out_buffer_x[p + 1] = rhs[c, k, j, i1, m]; p = p + 2; } } } #endregion Shift_backward.initiate_send(); } //if (timeron) timer.stop(t_xsolve); //--------------------------------------------------------------------- // If this was the last stage, do the block-diagonal inversion //--------------------------------------------------------------------- if (stage == 0) { Matvecproduct.enterStage(stage); Matvecproduct.go(); } } return(0); } // end activate method