// return true if it converges. Output: solution matrix, errors, loops it took public static Boolean solve(Matrix A, Matrix b, out Matrix x, out Matrix err, out int loops, Intracommunicator comm) { // check sanity. rank 0 only if (comm.Rank == 0 && (!A.isSquare || !b.isColumn || (A.Height != b.Height))) { Exception e = new Exception("Matrix A must be square! Matrix b must be a column matrix with the same height as matrix A!"); throw e; } // follow samples in Wikipedia step by step https://en.wikipedia.org/wiki/Gauss%E2%80%93Seidel_method benchmark bm = new benchmark(), bm2 = new benchmark(), bm3 = new benchmark(); double sequential = 0, parallel = 0, communication = 0; bm.start(); bm2.start(); // decompose A into the sum of a lower triangular component L* and a strict upper triangular component U int size = 0; Matrix L = null, U = null, L_1; if (comm.Rank == 0) { size = A.Height; Matrix.Decompose(A, out L, out U); } bm2.pause(); sequential += bm2.getElapsedSeconds(); bm2.start(); comm.Broadcast(ref size, 0); comm.Broadcast(ref U, 0); comm.Broadcast(ref b, 0); bm2.pause(); communication += bm2.getElapsedSeconds(); // Inverse matrix L* comm.Barrier(); L_1 = MatrixParallel.Inverse(L, comm, ref sequential, ref parallel, ref communication); // Main iteration: x (at step k+1) = T * x (at step k) + C // where T = - (inverse of L*) * U, and C = (inverse of L*) * b // split T & C into groups of rows, each for one slave, according to the nature of this algorithm // each slave will have one piece of T & one piece of C stored locally. the rest of T & C is not needed // there might be cases where jobs > slaves, so some might get no job at all // Changes: only split L_1. Slaves will calculate T & C (pieces) themselves bm2.start(); Matrix jobDistro = Utils.splitJob(size, comm.Size); int startRow = 0, endRow = 0, myJobSize = (int)jobDistro[0, comm.Rank]; for (int p = 0; p < comm.Size; p++) { if (p != comm.Rank) { startRow += (int)jobDistro[0, p]; } else { endRow = startRow + (int)jobDistro[0, p] - 1; break; } } Matrix[] L_1Ps = new Matrix[comm.Size]; if (comm.Rank == 0) { int slaveStart = 0; for (int p = 0; p < comm.Size; p++) { L_1Ps[p] = Matrix.extractRows(L_1, slaveStart, slaveStart + (int)jobDistro[0, p] - 1); slaveStart += (int)jobDistro[0, p]; } } bm2.pause(); sequential += bm2.getElapsedSeconds(); bm2.start(); Matrix L_1P = comm.Scatter(L_1Ps, 0); bm2.pause(); communication += bm2.getElapsedSeconds(); bm2.start(); Matrix T = -L_1P * U; Matrix C = L_1P * b; bm2.pause(); parallel += bm2.getElapsedSeconds(); // the actual iteration // if it still doesn't converge after this many loops, assume it won't converge and give up Boolean converge = false; int loopLimit = 100; x = Matrix.zeroLike(b); // at step k for (loops = 0; loops < loopLimit; loops++) { bm3.start(); // (re-)distributing x vector. Must be done every single loop // this loop needs x from the previous loop comm.Broadcast(ref x, 0); bm3.pause(); communication += bm3.getElapsedSeconds(); // calculation step bm3.start(); comm.Barrier(); Matrix new_x = T * x + C; // check convergence converge = Matrix.SomeClose(new_x, x, 1e-15, startRow); // collect result x comm.Barrier(); x = comm.Reduce(new_x, Matrix.Concatenate, 0); // collect convergence. consider converged if ALL slaves claim so converge = comm.Reduce(converge, bothTrue, 0); comm.Broadcast(ref converge, 0); // make sure EVERYONE breaks/coninues bm3.pause(); parallel += bm3.getElapsedSeconds(); if (converge) { loops++; break; } } bm2.start(); // round the result slightly err = null; if (comm.Rank == 0) { x.Round(1e-14); err = A * x - b; err.Round(1e-14); } bm2.pause(); sequential += bm2.getElapsedSeconds(); bm.pause(); if (showBenchmark) { Console.WriteLine("Sequential part took " + sequential + " secs."); Console.WriteLine("Parallel part took " + parallel + " secs."); Console.WriteLine("Communication took " + communication + " secs."); Console.WriteLine("Total: " + bm.getResult() + " (" + bm.getElapsedSeconds() + " secs). Seq + Parallel: " + (sequential + parallel)); } return(converge); }
public static Matrix Inverse(Matrix matrix, Intracommunicator comm, ref double timeS, ref double timeP, ref double timeC) { if (comm.Rank == 0 && !matrix.isSquare) { Exception e = new Exception("Matrix must be square!"); throw e; } benchmark bm = new benchmark(), bm2 = new benchmark(); bm.start(); int n = 0; int[] perm = new int[10]; int toggle = 0; Matrix lum = null; if (comm.Rank == 0) { n = matrix.dim1; lum = LUPDecompose(matrix, out perm, out toggle); } bm.pause(); timeS += bm.getElapsedSeconds(); bm.start(); comm.Broadcast(ref n, 0); comm.Broadcast(ref lum, 0); if (comm.Rank != 0) { perm = new int[n]; } comm.Broadcast(ref perm, 0); comm.Broadcast(ref toggle, 0); comm.Barrier(); bm.pause(); timeC += bm.getElapsedSeconds(); if (lum == null) { return(zeroLike(matrix)); } bm.start(); Double det = 0; if (comm.Rank == 0) { det = Determinant(lum, perm, toggle); } bm.pause(); timeS += bm.getElapsedSeconds(); bm.start(); comm.Broadcast(ref det, 0); comm.Barrier(); bm.pause(); timeC += bm.getElapsedSeconds(); if (det == 0) // not invertible { // still return for the sake of simplicity // Zero matrix * any matrix = zero matrix // so it's never a valid answer return(zeroLike(matrix)); } bm.pause(); int slaves = comm.Size; Matrix jobDistro = Utils.splitJob(n, slaves); int startCol = 0, endCol = 0, size = (int)jobDistro[0, comm.Rank]; for (int p = 0; p < slaves; p++) { if (p != comm.Rank) { startCol += (int)jobDistro[0, p]; } else { endCol = startCol + (int)jobDistro[0, p] - 1; break; } } bm.pause(); timeP += bm.getElapsedSeconds(); bm.start(); Matrix result = new Matrix(n, size); for (int i = startCol; i < startCol + size; ++i) { double[] b = new double[n]; for (int j = 0; j < n; ++j) { if (i == perm[j]) { b[j] = 1.0; } else { b[j] = 0.0; } } double[] x = HelperSolve(lum, b); for (int j = 0; j < n; ++j) { result[j, i - startCol] = x[j]; } } bm.pause(); timeP += bm.getElapsedSeconds(); bm.start(); // collect result result = comm.Reduce(result, ConcatenateColumn, 0); bm.pause(); timeP += bm.getElapsedSeconds(); return(result); }