private static DirtyImage ForwardCalculateB(Intracommunicator comm, GriddingConstants c, List <List <Subgrid> > metadata, Complex[,,] visibilities, double[,,] uvw, double[] frequencies, Complex[,] PsfCorrelation, float[,] psfCut, float maxSidelobe, Stopwatch watchIdg) { Stopwatch another = new Stopwatch(); comm.Barrier(); if (comm.Rank == 0) { watchIdg.Start(); } var localGrid = IDG.Grid(c, metadata, visibilities, uvw, frequencies); float[,] image = null; float maxSideLobeLevel = 0.0f; var grid_total = comm.Reduce <Complex[, ]>(localGrid, SequentialSum, 0); if (comm.Rank == 0) { var dirtyImage = FFT.BackwardFloat(grid_total, c.VisibilitiesCount); FFT.Shift(dirtyImage); if (comm.Rank == 0) { FitsIO.Write(dirtyImage, "dirtyImage.fits"); } maxSideLobeLevel = maxSidelobe * Residuals.GetMax(dirtyImage); //remove spheroidal image = Residuals.CalcGradientMap(dirtyImage, PsfCorrelation, new Rectangle(0, 0, psfCut.GetLength(0), psfCut.GetLength(1))); watchIdg.Stop(); } comm.Broadcast(ref maxSideLobeLevel, 0); comm.Broadcast(ref image, 0); return(new DirtyImage(image, maxSideLobeLevel)); }
// End StopSubTimer // Synchronize a boolean across MPI processes public static void SynchronizeMPIvariable(ref bool cosmicboolean) { if (MPI_Size > 1) { StartSubTimer(MPIBROADCASTTiming); MPI_communicator.Broadcast(ref cosmicboolean, 0); StopSubTimer(MPIBROADCASTTiming); } return; }
private static int determineLatencyReps() { double t0, duration = 0; int reps = 1, prev_reps = 0; int i; simpleAllreduce(); simpleAllreduce(); simpleAllreduce(); t0 = when(); t0 = when(); t0 = when(); while ((duration < 1) || (duration < 3 && reps < 1000)) { t0 = when(); for (i = 0; i < reps - prev_reps; i++) { simpleAllreduce(); } duration += when() - t0; prev_reps = reps; reps = reps * 2; comm.Broadcast(ref duration, 0); } return(reps); }
public static void Floyd(int[] local_mat, int n, int procid, int numproc, Intracommunicator comm) { int global_k, local_i, global_j, temp; int root; int[] row_k = new int[40]; for (global_k = 0; global_k < n; global_k++) { root = global_k / (n / numproc); //root = Owner(global_k, numproc, n); if (procid == root) { Copy_row(local_mat, n, numproc, row_k, global_k); } comm.Broadcast <int>(ref row_k, root); for (local_i = 0; local_i < n / numproc; local_i++) { for (global_j = 0; global_j < n; global_j++) { temp = local_mat[local_i * n + global_k] + row_k[global_j]; if (temp < local_mat[local_i * n + global_j]) { local_mat[local_i * n + global_j] = temp; } } } } }
private void SendMatrixToWorkers(Intracommunicator communicator) { using (var perfCounter = new PerformanceCounter("Master. Broadcasting Jacoby: ")) { var jacoby = this; communicator.Broadcast <Jacoby>(ref jacoby, 0); } }
static void Main(string[] args) { using (new MPI.Environment(ref args)) { int root = 0; int arraySize = 100; if (args.Length == 1) { arraySize = Convert.ToInt32(args[0]); } Intracommunicator comm = Communicator.world; int[] lotsOfNumbers = new int[arraySize]; if (comm.Rank == 0) { for (int i = 0; i < lotsOfNumbers.Length; i++) { lotsOfNumbers[i] = i; } } int sum = 0; comm.Broadcast(ref lotsOfNumbers, 0); //divides up the work int x = arraySize / comm.Size; int startingIndex = comm.Rank * x; int endingIndex = startingIndex + x; if (comm.Rank == comm.Size - 1) { endingIndex = lotsOfNumbers.Length; } for (int i = startingIndex; i < endingIndex; i++) { sum += lotsOfNumbers[i]; } Console.WriteLine("Rank " + comm.Rank + ": " + "summed the numbers from index " + startingIndex + " to index " + (endingIndex - 1) + " and got " + sum + "."); int totalSum = comm.Reduce(sum, Operation <int> .Add, root); if (comm.Rank == root) { Console.WriteLine("The total sum is: " + totalSum); } } }
private static float[,] CalculatePSF(Intracommunicator comm, GriddingConstants c, List <List <Subgrid> > metadata, double[,,] uvw, bool[,,] flags, double[] frequencies) { float[,] psf = null; var localGrid = IDG.GridPSF(c, metadata, uvw, flags, frequencies); var psf_total = comm.Reduce(localGrid, SequentialSum, 0); if (comm.Rank == 0) { psf = FFT.BackwardFloat(psf_total, c.VisibilitiesCount); FFT.Shift(psf); } comm.Broadcast(ref psf, 0); return(psf); }
public static void MPIWorker(Intracommunicator communicator) { Jacoby jacoby = null; ReceiveRequest request; using (var perf = new PerformanceCounter($"Comm {communicator.Rank} waiting for Jacoby duration: ")) { communicator.Broadcast <Jacoby>(ref jacoby, 0); } bool inProcess = true; using (var totalPerformance = new PerformanceCounter($"Comm {communicator.Rank} total work duration: ")) { int cycleIndex = 0; while (inProcess) { using (var perf = new PerformanceCounter($"Comm {communicator.Rank} cycle {cycleIndex} duration: ")) { JobToDo jobToDo = null; using (var perfRecv = new PerformanceCounter($"Comm {communicator.Rank} waiting for job-to-do (cycle {cycleIndex}): ")) { request = communicator.ImmediateReceive <JobToDo>(0, (int)JacobyMessageType.CalculatePart); request.Wait(); jobToDo = request.GetValue() as JobToDo; if (jobToDo.finish) { Console.WriteLine($"Comm {communicator.Rank} receive FINISH signal"); return; } } using (var perfWork = new PerformanceCounter($"Comm {communicator.Rank} work duration (cycle {cycleIndex}): ")) { Console.WriteLine($"Comm {communicator.Rank} start={jobToDo.start} end={jobToDo.end}"); var result = jacoby.PartialCalculation(jobToDo.start, jobToDo.end, jobToDo.initial); var srequest = communicator.ImmediateSend <JobDone>(new JobDone(result), 0, (int)JacobyMessageType.JobDone); } } cycleIndex++; } } }
static void Main(string[] args) { using (new MPI.Environment(ref args)) { Intracommunicator comm = Communicator.world; int Id = comm.Rank; Connect4 game = new Connect4(comm, comm.Size, Id); if (Id == 0) { Board board = new Board(); //game.CalculateNextMove(board); Stopwatch timer = new Stopwatch(); timer.Start(); Console.WriteLine("Igra krece!"); while (true) { board.Draw(); bool kraj = false; while (true) { Console.WriteLine("Daj potez:"); try { int column = Int32.Parse(Console.ReadLine()); if (board.Put(column, 1)) { kraj = true; } break; } catch (Exception) { } } if (kraj) { Console.WriteLine("Pobijedio je igrac!"); break; } timer.Reset(); timer.Start(); int nextMove = game.CalculateNextMove(board.Duplicate()); Console.WriteLine("Potrebno vrijeme za izračun: " + timer.ElapsedMilliseconds); Console.WriteLine("Računalo igra potez " + nextMove); if (board.Put(nextMove, 2)) { Console.WriteLine("Pobijedilo je računalo!"); break; } } board.Draw(); Message exitMsg = new Message(0, EXIT); comm.Broadcast <Message>(ref exitMsg, 0); } else { while (true) { Message msg = null; comm.Broadcast <Message>(ref msg, 0); if (msg.MessageType == EXIT) { break; } comm.Send <Message>(new Message(Id, NEXT_TASK), 0, 0); while (true) { msg = comm.Receive <Message>(0, 0); if (msg.MessageType == STOP) { break; } else if (msg.MessageType == TASK) { Task task = msg.task; task.value = game.CalculateStateValue(task.b, game.Reverse(task.nextPlayer), -1, 0); Message answer = new Message(Id, RESULT); answer.task = task; comm.Send <Message>(answer, 0, 0); } } } } } }
static void Main(string[] args) { // By default, test with primitives types and the predefined MPI_SUM Test test = testPrimitiveAndPredefined; using (new MPI.Environment(ref args)) { if (args.Length > 0 && args[0] == "/direct") { test = testDirect; System.Console.WriteLine("Using direct MPI interface."); } else if (args.Length > 0 && args[0] == "/user") { test = testPrimitiveAndMethod; Operation <double> .UseGeneratedUserOps = true; System.Console.WriteLine("Using primitive type (double) with user-defined sum and run-time code generation"); } else if (args.Length > 0 && args[0] == "/marshaluser") { test = testPrimitiveAndMethod; Operation <double> .UseGeneratedUserOps = false; System.Console.WriteLine("Using primitive type (double) with user-defined sum and marshalling"); } else if (args.Length > 0 && args[0] == "/valuetype") { test = testValueType; System.Console.WriteLine("Using value types with user-defined sum"); } else if (args.Length > 0 && args[0] == "/reftype") { test = testReferenceType; System.Console.WriteLine("Using reference types with user-defined sum"); } else { System.Console.WriteLine("Using MPI.NET interface."); } comm = MPI.Communicator.world; self = comm.Rank; System.Console.WriteLine(comm.Rank + ": " + MPI.Environment.ProcessorName); bwstats = new Stats[nSamp]; testLatency(); testSyncTime(); comm.Broadcast(ref latency, 0); if (self == 0) { System.Console.WriteLine("Latency: {0:F9}", latency); System.Console.WriteLine("Sync Time: {0:F9}", synctime); System.Console.WriteLine("Now starting main loop"); } int n, nq; int inc = 1, len; int start = 0, end = 1024 * 1024 * 1024; int bufflen = start; double tlast = latency; for (n = nq = 0, len = start; tlast < stopTime && len <= end; len += inc, nq++) { if (nq > 2 && (nq % 2 != 0)) { inc *= 2; } int ipert, pert; for (ipert = 0, pert = (inc > PERT + 1) ? -PERT : 0; pert <= PERT; ipert++, n++, pert += (inc > PERT + 1) ? PERT : PERT + 1) { int nRepeat = bufflen == 0 ? latencyReps : (int)Math.Max((RUNTM / ((double)bufflen / (bufflen - inc + 1.0) * tlast)), TRIALS); comm.Broadcast(ref nRepeat, 0); bufflen = len + pert; if (self == 0) { System.Console.Write("{0,3:D}: {1,9:D} doubles {2,7:D} times ---> ", n, bufflen, nRepeat); } GC.Collect(); test(bufflen, n, nRepeat, ref tlast); if (self == 0) { System.Console.WriteLine("{0,9:F2} Mbps in {1:F9} sec", bwstats[n].bps, tlast); } } } } }
// return true if it converges. Output: solution matrix, errors, loops it took public static Boolean solve(Matrix A, Matrix b, out Matrix x, out Matrix err, out int loops, Intracommunicator comm) { // check sanity. rank 0 only if (comm.Rank == 0 && (!A.isSquare || !b.isColumn || (A.Height != b.Height))) { Exception e = new Exception("Matrix A must be square! Matrix b must be a column matrix with the same height as matrix A!"); throw e; } // follow samples in Wikipedia step by step https://en.wikipedia.org/wiki/Gauss%E2%80%93Seidel_method benchmark bm = new benchmark(), bm2 = new benchmark(), bm3 = new benchmark(); double sequential = 0, parallel = 0, communication = 0; bm.start(); bm2.start(); // decompose A into the sum of a lower triangular component L* and a strict upper triangular component U int size = 0; Matrix L = null, U = null, L_1; if (comm.Rank == 0) { size = A.Height; Matrix.Decompose(A, out L, out U); } bm2.pause(); sequential += bm2.getElapsedSeconds(); bm2.start(); comm.Broadcast(ref size, 0); comm.Broadcast(ref U, 0); comm.Broadcast(ref b, 0); bm2.pause(); communication += bm2.getElapsedSeconds(); // Inverse matrix L* comm.Barrier(); L_1 = MatrixParallel.Inverse(L, comm, ref sequential, ref parallel, ref communication); // Main iteration: x (at step k+1) = T * x (at step k) + C // where T = - (inverse of L*) * U, and C = (inverse of L*) * b // split T & C into groups of rows, each for one slave, according to the nature of this algorithm // each slave will have one piece of T & one piece of C stored locally. the rest of T & C is not needed // there might be cases where jobs > slaves, so some might get no job at all // Changes: only split L_1. Slaves will calculate T & C (pieces) themselves bm2.start(); Matrix jobDistro = Utils.splitJob(size, comm.Size); int startRow = 0, endRow = 0, myJobSize = (int)jobDistro[0, comm.Rank]; for (int p = 0; p < comm.Size; p++) { if (p != comm.Rank) { startRow += (int)jobDistro[0, p]; } else { endRow = startRow + (int)jobDistro[0, p] - 1; break; } } Matrix[] L_1Ps = new Matrix[comm.Size]; if (comm.Rank == 0) { int slaveStart = 0; for (int p = 0; p < comm.Size; p++) { L_1Ps[p] = Matrix.extractRows(L_1, slaveStart, slaveStart + (int)jobDistro[0, p] - 1); slaveStart += (int)jobDistro[0, p]; } } bm2.pause(); sequential += bm2.getElapsedSeconds(); bm2.start(); Matrix L_1P = comm.Scatter(L_1Ps, 0); bm2.pause(); communication += bm2.getElapsedSeconds(); bm2.start(); Matrix T = -L_1P * U; Matrix C = L_1P * b; bm2.pause(); parallel += bm2.getElapsedSeconds(); // the actual iteration // if it still doesn't converge after this many loops, assume it won't converge and give up Boolean converge = false; int loopLimit = 100; x = Matrix.zeroLike(b); // at step k for (loops = 0; loops < loopLimit; loops++) { bm3.start(); // (re-)distributing x vector. Must be done every single loop // this loop needs x from the previous loop comm.Broadcast(ref x, 0); bm3.pause(); communication += bm3.getElapsedSeconds(); // calculation step bm3.start(); comm.Barrier(); Matrix new_x = T * x + C; // check convergence converge = Matrix.SomeClose(new_x, x, 1e-15, startRow); // collect result x comm.Barrier(); x = comm.Reduce(new_x, Matrix.Concatenate, 0); // collect convergence. consider converged if ALL slaves claim so converge = comm.Reduce(converge, bothTrue, 0); comm.Broadcast(ref converge, 0); // make sure EVERYONE breaks/coninues bm3.pause(); parallel += bm3.getElapsedSeconds(); if (converge) { loops++; break; } } bm2.start(); // round the result slightly err = null; if (comm.Rank == 0) { x.Round(1e-14); err = A * x - b; err.Round(1e-14); } bm2.pause(); sequential += bm2.getElapsedSeconds(); bm.pause(); if (showBenchmark) { Console.WriteLine("Sequential part took " + sequential + " secs."); Console.WriteLine("Parallel part took " + parallel + " secs."); Console.WriteLine("Communication took " + communication + " secs."); Console.WriteLine("Total: " + bm.getResult() + " (" + bm.getElapsedSeconds() + " secs). Seq + Parallel: " + (sequential + parallel)); } return(converge); }
public static float[,] Reconstruct(Intracommunicator comm, DistributedData.LocalDataset local, GriddingConstants c, int maxCycle, float lambda, float alpha, int iterPerCycle = 1000, bool usePathDeconvolution = false) { var watchTotal = new Stopwatch(); var watchForward = new Stopwatch(); var watchBackward = new Stopwatch(); var watchDeconv = new Stopwatch(); watchTotal.Start(); var metadata = Partitioner.CreatePartition(c, local.UVW, local.Frequencies); var patchSize = CalculateLocalImageSection(comm.Rank, comm.Size, c.GridSize, c.GridSize); var totalSize = new Rectangle(0, 0, c.GridSize, c.GridSize); //calculate psf and prepare for correlation in the Fourier space var psf = CalculatePSF(comm, c, metadata, local.UVW, local.Flags, local.Frequencies); Complex[,] PsfCorrelation = null; var maxSidelobe = PSF.CalcMaxSidelobe(psf); lambda = (float)(lambda * PSF.CalcMaxLipschitz(psf)); StreamWriter writer = null; if (comm.Rank == 0) { FitsIO.Write(psf, "psf.fits"); Console.WriteLine("done PSF gridding "); PsfCorrelation = PSF.CalcPaddedFourierCorrelation(psf, totalSize); writer = new StreamWriter(comm.Size + "runtimestats.txt"); } var deconvovler = new MPIGreedyCD(comm, totalSize, patchSize, psf); var residualVis = local.Visibilities; var xLocal = new float[patchSize.YEnd - patchSize.Y, patchSize.XEnd - patchSize.X]; for (int cycle = 0; cycle < maxCycle; cycle++) { if (comm.Rank == 0) { Console.WriteLine("cycle " + cycle); } var dirtyImage = ForwardCalculateB(comm, c, metadata, residualVis, local.UVW, local.Frequencies, PsfCorrelation, psf, maxSidelobe, watchForward); var bLocal = GetImgSection(dirtyImage.Image, patchSize); MPIGreedyCD.Statistics lastRun; if (usePathDeconvolution) { var currentLambda = Math.Max(1.0f / alpha * dirtyImage.MaxSidelobeLevel, lambda); lastRun = deconvovler.DeconvolvePath(xLocal, bLocal, currentLambda, 4.0f, alpha, 5, iterPerCycle, 2e-5f); } else { lastRun = deconvovler.Deconvolve(xLocal, bLocal, lambda, alpha, iterPerCycle, 1e-5f); } if (comm.Rank == 0) { WriteToFile(cycle, lastRun, writer); if (lastRun.Converged) { Console.WriteLine("-----------------------------CONVERGED!!!!------------------------"); } else { Console.WriteLine("-------------------------------not converged----------------------"); } } comm.Barrier(); if (comm.Rank == 0) { watchDeconv.Stop(); } float[][,] totalX = null; comm.Gather(xLocal, 0, ref totalX); Complex[,] modelGrid = null; if (comm.Rank == 0) { watchBackward.Start(); var x = new float[c.GridSize, c.GridSize]; StitchImage(totalX, x, comm.Size); FitsIO.Write(x, "xImage_" + cycle + ".fits"); FFT.Shift(x); modelGrid = FFT.Forward(x); } comm.Broadcast(ref modelGrid, 0); var modelVis = IDG.DeGrid(c, metadata, modelGrid, local.UVW, local.Frequencies); residualVis = Visibilities.Substract(local.Visibilities, modelVis, local.Flags); } writer.Close(); float[][,] gatherX = null; comm.Gather(xLocal, 0, ref gatherX); float[,] reconstructed = null; if (comm.Rank == 0) { reconstructed = new float[c.GridSize, c.GridSize];; StitchImage(gatherX, reconstructed, comm.Size); } return(reconstructed); }
static void Main(string[] args) { // By default, test with primitives types and the predefined MPI_SUM Test test = testPrimitiveAndPredefined; using (new MPI.Environment(ref args)) { if (args.Length > 0 && args[0] == "/direct") { test = testDirect; System.Console.WriteLine("Using direct MPI interface."); } else if (args.Length > 0 && args[0] == "/user") { test = testPrimitiveAndMethod; Operation<double>.UseGeneratedUserOps = true; System.Console.WriteLine("Using primitive type (double) with user-defined sum and run-time code generation"); } else if (args.Length > 0 && args[0] == "/marshaluser") { test = testPrimitiveAndMethod; Operation<double>.UseGeneratedUserOps = false; System.Console.WriteLine("Using primitive type (double) with user-defined sum and marshalling"); } else if (args.Length > 0 && args[0] == "/valuetype") { test = testValueType; System.Console.WriteLine("Using value types with user-defined sum"); } else if (args.Length > 0 && args[0] == "/reftype") { test = testReferenceType; System.Console.WriteLine("Using reference types with user-defined sum"); } else System.Console.WriteLine("Using MPI.NET interface."); comm = MPI.Communicator.world; self = comm.Rank; System.Console.WriteLine(comm.Rank + ": " + MPI.Environment.ProcessorName); bwstats = new Stats[nSamp]; testLatency(); testSyncTime(); comm.Broadcast(ref latency, 0); if (self == 0) { System.Console.WriteLine("Latency: {0:F9}", latency); System.Console.WriteLine("Sync Time: {0:F9}", synctime); System.Console.WriteLine("Now starting main loop"); } int n, nq; int inc = 1, len; int start = 0, end = 1024 * 1024 * 1024; int bufflen = start; double tlast = latency; for (n = nq = 0, len = start; tlast < stopTime && len <= end; len += inc, nq++) { if (nq > 2 && (nq % 2 != 0)) inc *= 2; int ipert, pert; for (ipert = 0, pert = (inc > PERT + 1) ? -PERT : 0; pert <= PERT; ipert++, n++, pert += (inc > PERT + 1) ? PERT : PERT + 1) { int nRepeat = bufflen == 0 ? latencyReps : (int)Math.Max((RUNTM / ((double)bufflen / (bufflen - inc + 1.0) * tlast)), TRIALS); comm.Broadcast(ref nRepeat, 0); bufflen = len + pert; if (self == 0) System.Console.Write("{0,3:D}: {1,9:D} doubles {2,7:D} times ---> ", n, bufflen, nRepeat); GC.Collect(); test(bufflen, n, nRepeat, ref tlast); if (self == 0) System.Console.WriteLine("{0,9:F2} Mbps in {1:F9} sec", bwstats[n].bps, tlast); } } } }
public int CalculateNextMove(Board b) { List <Task> tasks = GenerateTasks(new Board(), "", PLAYER, -1, 0, new List <Task>()); var map = new Dictionary <string, double>(); int stopped = 0; if (N == 1) { foreach (Task task in tasks) { map[task.key] = CalculateStateValue(task.b, Reverse(task.nextPlayer), -1, 0); } } else { Message startMsg = new Message(0, START); comm.Broadcast <Message>(ref startMsg, 0); Message msg; while (tasks.Any() || stopped < N - 1) { msg = comm.Receive <Message>(MPI.Unsafe.MPI_ANY_SOURCE, 0); if (msg.MessageType == RESULT) { map[msg.task.key] = msg.task.value; } if (tasks.Any()) { Message newTask = new Message(0, TASK); newTask.task = tasks[0]; tasks.RemoveAt(0); comm.Send <Message>(newTask, msg.SourceId, 0); } else { stopped++; comm.Send <Message>(new Message(0, STOP), msg.SourceId, 0); } } } double bestSol = Double.MinValue, currSol; int bestMove = -1; for (int move = 0; move < C; move++) { if (b.columns[move].LastPosition() < R - 1) { currSol = CalculateMoveValue(b, "", COMPUTER, move, 1, map); Console.WriteLine(move + " " + currSol); if (currSol > bestSol) { bestSol = currSol; bestMove = move; } } } return(bestMove); }
public int initialConfig() { int fstatus = 0, niter = 0; if (node == 0) { Console.WriteLine(" NAS Parallel Benchmarks " + "3.3" + " -- FT Benchmark "); try { Console.Write("Trying Read from input file inputft.data: "); int[] conf = { 1, 1, 2 };//Line 1: 1 var; Line 2: 1 var; Line 3: 2 vars; string[] vetTemp = readFileData("inputft.data", conf); niter = int.Parse(vetTemp[0]); layout_type = int.Parse(vetTemp[1]); np1 = int.Parse(vetTemp[2]); np2 = int.Parse(vetTemp[3]); } catch { Console.WriteLine("inputft.data not found"); fstatus = 1; } if (fstatus == 0) { Console.WriteLine("inputft.data found"); if (np1 * np2 != np) { Console.WriteLine(" np1 and np2 given in input file are not valid."); Console.WriteLine("Product is " + np1 * np2 + " and should be " + np); System.Environment.Exit(0); } if (layout_type != layout_0D && layout_type != layout_1D && layout_type != layout_2D) { Console.WriteLine(" Layout type specified in inputft.data is invalid "); System.Environment.Exit(0); } if (layout_type == layout_0D && (np1 != 1 || np2 != 1)) { Console.WriteLine(" For 0D layout, both np1 and np2 must be 1 "); System.Environment.Exit(0); } if (layout_type == layout_1D && np1 != 1) { Console.WriteLine(" For 1D layout, np1 must be 1 "); System.Environment.Exit(0); } } else { Console.WriteLine(" No input file inputft.data. Using compiled defaults"); niter = niter_default; if (np == 1) { np1 = 1; np2 = 1; layout_type = layout_0D; } else if (np <= nz) { np1 = 1; np2 = np; layout_type = layout_1D; } else { np1 = nz; np2 = np / nz; layout_type = layout_2D; } } Console.WriteLine(" Size: " + nx + "x" + ny + "x" + nz); Console.WriteLine(" Iterations: " + niter); Console.WriteLine(" Number of processes : " + np); Console.WriteLine(" Processor array : " + np1 + "x" + np2); if (layout_type == layout_0D) { Console.WriteLine(" Layout type: OD"); } else if (layout_type == layout_1D) { Console.WriteLine(" Layout type: 1D"); } else { Console.WriteLine(" Layout type: 2D"); } } worldcomm.Broadcast <int>(ref np1, root); worldcomm.Broadcast <int>(ref niter, root); worldcomm.Broadcast <int>(ref np2, root); if (np1 == 1 && np2 == 1) { layout_type = layout_0D; } else if (np1 == 1) { layout_type = layout_1D; } else { layout_type = layout_2D; } return(niter); }
public void Broadcast <T>(ref T message, int rank) { communicator.Broadcast(ref message, rank); }
static void Main(String[] args) { using (new MPI.Environment(ref args)) { Intracommunicator comm = MPI.Communicator.world; int p = comm.Size; int rank = comm.Rank; int range = 0, rem = 0, tag = 0; int N = 0; // the number of subparts of an image int noOfComponent = 0; int noOfPersons = 0; // the no of persons in the training data (here it's 5) string minLabel = ""; double minDistance = 0; //this is used to get the distance of the training data and the predicted image EuclideanDistance ed = new EuclideanDistance(); //this list will containt the parts of image after deviding it to N parts List <double[][]> testSubImgs = new List <double[][]>(); //this will contain the weighs of each persono (N parts for each person) List <double[][]> weights = new List <double[][]>(); //the label associated with each weight List <string> labels = new List <string>(); if (rank == 0) // It's the root { //create an object of the algorithm ModularFaceRecognitionAlgorithms mpca = new ModularFaceRecognitionAlgorithms(); //load the training data from file mpca.loadTrainingData("C:/train.txt"); //prepare the image for testing // u can change s1 to s2,s3,s4 ... s5 and watch out the result String filePath = "C:/test/s4/10.bmp"; Matrix test = FileManager.GetBitMapColorMatrix(filePath); //divide the image into N parts testSubImgs = testSubImgs = mpca.devideImageToN(test, mpca.N); //prepare local variables noOfPersons = mpca.weights.Count / mpca.N; N = mpca.N; noOfComponent = mpca.numOfComponents; weights = mpca.weights; labels = mpca.labels; if (p > 1) //this cond. to handle the exception of a single master process { //compute the no. of persons checked per process //each process will be resposible for a nubmber of persons //the process returns the dist. and label of the min distance of its persons range = noOfPersons / (p - 1); rem = noOfPersons % (p - 1); if (range == 0) // in case for ex. we have 5 persons and 6 slaves { range = 1; rem = 0; } } else { Console.WriteLine("There's only a master process"); } //broadcast the needed variables comm.Broadcast(ref N, 0); comm.Broadcast(ref noOfComponent, 0); comm.Broadcast(ref range, 0); comm.Broadcast(ref rem, 0); comm.Broadcast(ref testSubImgs, 0); comm.Broadcast(ref weights, 0); comm.Broadcast(ref labels, 0); comm.Broadcast(ref noOfPersons, 0); string resLabel = ""; //the final resulted label double resDistance = 0; // the final resulted distance minLabel = ""; //used to receive the min label of each slave minDistance = 0; //used to reciec the min distance of each slave //in the following for loop we are receiving the min distance and label //resulted from each slave and then get the min of them all // the resulted resLabel and resDistance is the final result //these line is used to handle if we have processes more than the noOfpersons int endLoop = p - 1; if (noOfPersons < (p - 1)) { endLoop = noOfPersons; } for (int src = 1; src <= endLoop; src++) { comm.Receive(src, tag, out minDistance); comm.Receive(src, tag, out minLabel); if (src == 1 || minDistance < resDistance) { resLabel = minLabel; resDistance = minDistance; } } Console.WriteLine("resLabel = " + resLabel); Console.WriteLine("resDistance = " + resDistance); } else { comm.Broadcast(ref N, 0); comm.Broadcast(ref noOfComponent, 0); comm.Broadcast(ref range, 0); comm.Broadcast(ref rem, 0); comm.Broadcast(ref testSubImgs, 0); comm.Broadcast(ref weights, 0); comm.Broadcast(ref labels, 0); comm.Broadcast(ref noOfPersons, 0); if (rank <= noOfPersons) //other wise do nothing { if (rank <= rem) { range++; } int start = 0; if (rank <= rem) { start = (rank - 1) * N + (rank - 1) * N; } else { start = (rank - 1) * N + rem * N; } //As we mentioned before the range is the number of personse per process // so in this for loop we are calculating the distance of each person // and eventually send the min distance and lable to the master process for (int i = 0; i < range; i++) { double dpj = 0; int begin = i * N + start; for (int j = begin, m = 0; m < N; j++, m++) { double dist = ed.getDistance(weights[j], testSubImgs[m]); dpj += ((double)1 / noOfComponent) * dist; } double dp = ((double)1 / N) * dpj; if (i == 0 || dp < minDistance) { minLabel = labels[begin]; minDistance = dp; } } comm.Send(minDistance, 0, 0); comm.Send(minLabel, 0, 0); } } } }
static void Main(string[] args) { // Whether we should use the unsafe, Direct interface to MPI. // When false, use the normal MPI.NET interface. bool useDirectInterface = false; using (MPI.Environment env = new MPI.Environment(ref args)) { if (args.Length > 0 && args[0] == "/direct") { useDirectInterface = true; System.Console.WriteLine("Using direct MPI interface."); } else { System.Console.WriteLine("Using MPI.NET interface."); } comm = MPI.Communicator.world; if (comm.Size != 2) { if (comm.Rank == 0) { System.Console.WriteLine("Only two processes allowed. Rerun with -np 2"); } return; } else { self = comm.Rank; other = (comm.Rank + 1) % 2; } System.Console.WriteLine(comm.Rank + ": " + MPI.Environment.ProcessorName); bwstats = new Stats[nSamp]; testLatency(); testSyncTime(); comm.Broadcast(ref latency, 0); if (self == 0) { System.Console.WriteLine("Latency: {0:F9}", latency); System.Console.WriteLine("Sync Time: {0:F9}", synctime); System.Console.WriteLine("Now starting main loop"); } int i, j, n, nq; int inc = 1, len; int start = 0, end = 1024 * 1024 * 1024; int bufflen = start; double tlast = latency; for (n = nq = 0, len = start; tlast < stopTime && len <= end; len += inc, nq++) { if (nq > 2 && (nq % 2 != 0)) { inc *= 2; } int ipert, pert; for (ipert = 0, pert = (inc > PERT + 1) ? -PERT : 0; pert <= PERT; ipert++, n++, pert += (inc > PERT + 1) ? PERT : PERT + 1) { int nRepeat = bufflen == 0 ? latencyReps : (int)Math.Max((RUNTM / ((double)bufflen / (bufflen - inc + 1.0) * tlast)), TRIALS); comm.Broadcast(ref nRepeat, 0); bufflen = len + pert; byte[] sendBuffer = new byte[bufflen]; // Align the data? Some day. Maybe. byte[] recvBuffer = new byte[bufflen]; if (self == 0) { System.Console.Write("{0,3:D}: {1,9:D} bytes {2,7:D} times ---> ", n, bufflen, nRepeat); } bwstats[n].t = 1e99; double t1 = 0, t2 = 0; for (i = 0; i < TRIALS; i++) { sync(); double t0 = when(); if (useDirectInterface) { // Use the unsafe, direct interface to MPI via P/Invoke unsafe { fixed(byte *sendPtr = sendBuffer, recvPtr = recvBuffer) { for (j = 0; j < nRepeat; j++) { if (self == 0) { Unsafe.MPI_Send(new IntPtr(sendPtr), bufflen, Unsafe.MPI_BYTE, other, 142, Unsafe.MPI_COMM_WORLD); Unsafe.MPI_Recv(new IntPtr(recvPtr), bufflen, Unsafe.MPI_BYTE, other, 242, Unsafe.MPI_COMM_WORLD, out *Unsafe.MPI_STATUS_IGNORE); } else { Unsafe.MPI_Recv(new IntPtr(recvPtr), bufflen, Unsafe.MPI_BYTE, other, 142, Unsafe.MPI_COMM_WORLD, out *Unsafe.MPI_STATUS_IGNORE); Unsafe.MPI_Send(new IntPtr(sendPtr), bufflen, Unsafe.MPI_BYTE, other, 242, Unsafe.MPI_COMM_WORLD); } } } } } else { for (j = 0; j < nRepeat; j++) { if (self == 0) { comm.Send(sendBuffer, other, 142); comm.Receive(other, 242, ref recvBuffer); } else { comm.Receive(other, 142, ref recvBuffer); comm.Send(sendBuffer, other, 242); } } } double t = (when() - t0) / (2.0 * nRepeat); t2 += t * t; t1 += t; bwstats[n].t = Math.Min(bwstats[n].t, t); bwstats[n].variance = t2 / TRIALS - t1 / TRIALS * t1 / TRIALS; tlast = bwstats[n].t; bwstats[n].bits = bufflen * sizeof(byte) * 8; bwstats[n].bps = bwstats[n].bits / (bwstats[n].t * 1024 * 1024); bwstats[n].repeat = nRepeat; } if (self == 0) { System.Console.WriteLine("{0,9:F2} Mbps in {1:F9} sec", bwstats[n].bps, tlast); } } } } }
static void Main(string[] args) { // Whether we should use the unsafe, Direct interface to MPI. // When false, use the normal MPI.NET interface. bool useDirectInterface = false; using (MPI.Environment env = new MPI.Environment(ref args)) { if (args.Length > 0 && args[0] == "/direct") { useDirectInterface = true; System.Console.WriteLine("Using direct MPI interface."); } else System.Console.WriteLine("Using MPI.NET interface."); comm = MPI.Communicator.world; if (comm.Size != 2) { if (comm.Rank == 0) System.Console.WriteLine("Only two processes allowed. Rerun with -np 2"); return; } else { self = comm.Rank; other = (comm.Rank + 1) % 2; } System.Console.WriteLine(comm.Rank + ": " + MPI.Environment.ProcessorName); bwstats = new Stats[nSamp]; testLatency(); testSyncTime(); comm.Broadcast(ref latency, 0); if (self == 0) { System.Console.WriteLine("Latency: {0:F9}", latency); System.Console.WriteLine("Sync Time: {0:F9}", synctime); System.Console.WriteLine("Now starting main loop"); } int i, j, n, nq; int inc = 1, len; int start = 0, end = 1024 * 1024 * 1024; int bufflen = start; double tlast = latency; for (n = nq = 0, len = start; tlast < stopTime && len <= end; len += inc, nq++) { if (nq > 2 && (nq % 2 != 0)) inc *= 2; int ipert, pert; for (ipert = 0, pert = (inc > PERT + 1) ? -PERT : 0; pert <= PERT; ipert++, n++, pert += (inc > PERT + 1) ? PERT : PERT + 1) { int nRepeat = bufflen == 0 ? latencyReps : (int)Math.Max((RUNTM / ((double)bufflen / (bufflen - inc + 1.0) * tlast)), TRIALS); comm.Broadcast(ref nRepeat, 0); bufflen = len + pert; byte[] sendBuffer = new byte[bufflen]; // Align the data? Some day. Maybe. byte[] recvBuffer = new byte[bufflen]; if (self == 0) System.Console.Write("{0,3:D}: {1,9:D} bytes {2,7:D} times ---> ", n, bufflen, nRepeat); bwstats[n].t = 1e99; double t1 = 0, t2 = 0; for (i = 0; i < TRIALS; i++) { sync(); double t0 = when(); if (useDirectInterface) { // Use the unsafe, direct interface to MPI via P/Invoke unsafe { fixed (byte* sendPtr = sendBuffer, recvPtr = recvBuffer) { for (j = 0; j < nRepeat; j++) { if (self == 0) { Unsafe.MPI_Send(new IntPtr(sendPtr), bufflen, Unsafe.MPI_BYTE, other, 142, Unsafe.MPI_COMM_WORLD); Unsafe.MPI_Recv(new IntPtr(recvPtr), bufflen, Unsafe.MPI_BYTE, other, 242, Unsafe.MPI_COMM_WORLD, out *Unsafe.MPI_STATUS_IGNORE); } else { Unsafe.MPI_Recv(new IntPtr(recvPtr), bufflen, Unsafe.MPI_BYTE, other, 142, Unsafe.MPI_COMM_WORLD, out *Unsafe.MPI_STATUS_IGNORE); Unsafe.MPI_Send(new IntPtr(sendPtr), bufflen, Unsafe.MPI_BYTE, other, 242, Unsafe.MPI_COMM_WORLD); } } } } } else { for (j = 0; j < nRepeat; j++) { if (self == 0) { comm.Send(sendBuffer, other, 142); comm.Receive(other, 242, ref recvBuffer); } else { comm.Receive(other, 142, ref recvBuffer); comm.Send(sendBuffer, other, 242); } } } double t = (when() - t0) / (2.0 * nRepeat); t2 += t*t; t1 += t; bwstats[n].t = Math.Min(bwstats[n].t, t); bwstats[n].variance = t2 / TRIALS - t1 / TRIALS * t1 / TRIALS; tlast = bwstats[n].t; bwstats[n].bits = bufflen * sizeof(byte)*8; bwstats[n].bps = bwstats[n].bits / (bwstats[n].t * 1024 * 1024); bwstats[n].repeat = nRepeat; } if (self == 0) System.Console.WriteLine("{0,9:F2} Mbps in {1:F9} sec", bwstats[n].bps, tlast); } } } }
private static void Test(int root) { Intracommunicator world = Communicator.world; // Broadcast an integer int intValue = default(int); if (world.Rank == root) { intValue = 17; System.Console.Write("Broadcasting integer from root " + root + "..."); } world.Broadcast(ref intValue, root); MPIDebug.Assert(intValue == 17); if (world.Rank == root) { System.Console.WriteLine(" done."); } // Broadcast a string string strValue = ""; if (world.Rank == root) { strValue = "Hello, World!"; System.Console.Write("Broadcasting string from root " + root + "..."); } world.Broadcast(ref strValue, root); MPIDebug.Assert(strValue == "Hello, World!"); if (world.Rank == root) { System.Console.WriteLine(" done."); } // Broadcast an array of integers int[] intArray = new int[7]; if (world.Rank == root) { intArray = new int[] { 1, 1, 2, 3, 5, 8, 13 }; System.Console.Write("Broadcasting integer array from root " + root + "..."); } world.Broadcast(ref intArray, root); MPIDebug.Assert(intArray[3] == 3); if (world.Rank == root) { System.Console.WriteLine(" done."); } // Broadcast an array of strings string[] strArray = new string[2]; if (world.Rank == root) { strArray = new string[] { "Hello", "World" }; System.Console.Write("Broadcasting string array from root " + root + "..."); } world.Broadcast(ref strArray, root); MPIDebug.Assert(strArray[0] == "Hello" && strArray[1] == "World"); if (world.Rank == root) { System.Console.WriteLine(" done."); } }
private const int ROOT_PROCESS_ID = 0; // Define Root Process ID public static int Main(string[] args) { using (new MPI.Environment(ref args)) // Initialize MPI Environment { Intracommunicator comm = Communicator.world; // Define a shortcut to world communicator string[] contents = null; // Right here, we will store the lines of the text file char what = '\0'; // The character which the user enters int totalCharacters = 0; // Total characters per process int totalOccurrences = 0; // Total occurrences per process double totalTime = 0; // Total time per process bool verbose = false; // Verbose output on command line if (args.Length > 0 && args[0].ToLower() == "--verbose") // If the flag "--verbose" exists, set verbose = true { verbose = true; } int allCharacters; // All characters by all processes int allOccurrences; // All occurrencess by all processes // If we're in the root process if (comm.Rank == ROOT_PROCESS_ID) { // Ask the user to enter the character he want to search for Console.Write("Enter the character you want to search for: "); what = (char)Console.Read(); contents = File.ReadAllLines(PATH); // Load contents of file inside the array } comm.Broadcast(ref contents, ROOT_PROCESS_ID); // Broadcast contents from root comm.Broadcast(ref what, ROOT_PROCESS_ID); // Broadcast character from root // All processes must reach this point before going on // This is important for time measurement comm.Barrier(); double start = MPI.Environment.Time; // The start of time measuring // Let's divide the load for processes, each process is responsible for a specific number of lines for (int i = comm.Rank; i < contents.Length; i += comm.Size) { if (verbose) { Console.WriteLine("Process ID = {0}, Line number = {1}", comm.Rank, i); } for (int j = 0; j < contents[i].Length; j++) { if (contents[i][j] == what) // An occurrence has been found { if (verbose) { Console.WriteLine("Process {0} has found the character at {1}:{2}", comm.Rank, i + 1, j + 1); } totalOccurrences += 1; } totalCharacters += 1; } } // All processes must reach here before we stop the timer comm.Barrier(); double end = MPI.Environment.Time; // The end of time measurement allOccurrences = comm.Reduce(totalOccurrences, Operation <int> .Add, ROOT_PROCESS_ID); // Accumulate all occurrencess allCharacters = comm.Reduce(totalCharacters, Operation <int> .Add, ROOT_PROCESS_ID); // Accumulate all characters totalTime = comm.Reduce(end - start, Operation <double> .Max, ROOT_PROCESS_ID); // The overall time is the longest time of all processes Console.WriteLine("Total occurrences found by process {0} is {1}", comm.Rank, totalOccurrences); Console.WriteLine("Total characters processed by {0} is {1}", comm.Rank, totalCharacters); // Finally, if we're here, print all information if (comm.Rank == ROOT_PROCESS_ID) { Console.WriteLine("Overall occurrences = {0}", allOccurrences); Console.WriteLine("Overall characters = {0}", allCharacters); Console.WriteLine("Overall time = {0}s", totalTime); } } return(0); }
public static Matrix Inverse(Matrix matrix, Intracommunicator comm, ref double timeS, ref double timeP, ref double timeC) { if (comm.Rank == 0 && !matrix.isSquare) { Exception e = new Exception("Matrix must be square!"); throw e; } benchmark bm = new benchmark(), bm2 = new benchmark(); bm.start(); int n = 0; int[] perm = new int[10]; int toggle = 0; Matrix lum = null; if (comm.Rank == 0) { n = matrix.dim1; lum = LUPDecompose(matrix, out perm, out toggle); } bm.pause(); timeS += bm.getElapsedSeconds(); bm.start(); comm.Broadcast(ref n, 0); comm.Broadcast(ref lum, 0); if (comm.Rank != 0) { perm = new int[n]; } comm.Broadcast(ref perm, 0); comm.Broadcast(ref toggle, 0); comm.Barrier(); bm.pause(); timeC += bm.getElapsedSeconds(); if (lum == null) { return(zeroLike(matrix)); } bm.start(); Double det = 0; if (comm.Rank == 0) { det = Determinant(lum, perm, toggle); } bm.pause(); timeS += bm.getElapsedSeconds(); bm.start(); comm.Broadcast(ref det, 0); comm.Barrier(); bm.pause(); timeC += bm.getElapsedSeconds(); if (det == 0) // not invertible { // still return for the sake of simplicity // Zero matrix * any matrix = zero matrix // so it's never a valid answer return(zeroLike(matrix)); } bm.pause(); int slaves = comm.Size; Matrix jobDistro = Utils.splitJob(n, slaves); int startCol = 0, endCol = 0, size = (int)jobDistro[0, comm.Rank]; for (int p = 0; p < slaves; p++) { if (p != comm.Rank) { startCol += (int)jobDistro[0, p]; } else { endCol = startCol + (int)jobDistro[0, p] - 1; break; } } bm.pause(); timeP += bm.getElapsedSeconds(); bm.start(); Matrix result = new Matrix(n, size); for (int i = startCol; i < startCol + size; ++i) { double[] b = new double[n]; for (int j = 0; j < n; ++j) { if (i == perm[j]) { b[j] = 1.0; } else { b[j] = 0.0; } } double[] x = HelperSolve(lum, b); for (int j = 0; j < n; ++j) { result[j, i - startCol] = x[j]; } } bm.pause(); timeP += bm.getElapsedSeconds(); bm.start(); // collect result result = comm.Reduce(result, ConcatenateColumn, 0); bm.pause(); timeP += bm.getElapsedSeconds(); return(result); }