/// <summary> /// Assigns experiments to available herd agents. /// </summary> /// <param name="freeHerdAgents"></param> public static void AssignExperiments(ref List <ExperimentalUnit> pendingExperiments , ref List <HerdAgentInfo> freeHerdAgents, ref List <Job> assignedJobs, JobDispatcherOptions options = null) { //Clear the list: these are jobs which have to be sent assignedJobs.Clear(); //Create a list of agents that are given work. We need to remove them from the "free" list out of the loop List <HerdAgentInfo> usedHerdAgents = new List <HerdAgentInfo>(); //We iterate on the free agents to decide what jobs to give each of them until: // -either there are no more pending experiments // -all agents have been given work foreach (HerdAgentInfo agent in freeHerdAgents) { List <ExperimentalUnit> experiments = new List <ExperimentalUnit>(); int numFreeCores = agent.NumProcessors; bool bAgentUsed = false; ExperimentalUnit experiment; bool bFailedToFindMatch = false; //leave one free core?? if (options != null && options.LeaveOneFreeCore && numFreeCores > 1) { numFreeCores--; } while (numFreeCores > 0 && !bFailedToFindMatch) { experiment = FirstFittingExperiment(pendingExperiments, numFreeCores, bAgentUsed, agent); if (experiment != null) { //remove the experiment from the list and add it to running experiments experiments.Add(experiment); pendingExperiments.Remove(experiment); //update the number of free cpu cores if (experiment.RunTimeReqs.NumCPUCores > 0) { numFreeCores -= experiment.RunTimeReqs.NumCPUCores; } else { numFreeCores = 0; } bAgentUsed = true; } else { bFailedToFindMatch = true; } } if (bAgentUsed) { Job newJob = new Job(experiments, agent); assignedJobs.Add(newJob); usedHerdAgents.Add(agent); ++jobId; } if (pendingExperiments.Count == 0) { break; } } //now we can remove used agents from the list foreach (HerdAgentInfo agent in usedHerdAgents) { freeHerdAgents.Remove(agent); } }
public static async Task <int> RunExperimentsAsync(List <ExperimentalUnit> experiments, List <HerdAgentInfo> freeHerdAgents , Monitoring.MsgDispatcher dispatcher , CancellationTokenSource cancellationTokenSource , JobDispatcherOptions jobDispatcherOptions = null) { List <Job> assignedJobs = new List <Job>(); List <Task <Job> > monitoredJobTasks = new List <Task <Job> >(); int numExperimentalUnitsRun = 0; // Calculate run-time requirements foreach (ExperimentalUnit experimentalUnit in experiments) { experimentalUnit.RequestRuntimeRequirements(); } // Assign experiments to free agents AssignExperiments(ref experiments, ref freeHerdAgents, ref assignedJobs, jobDispatcherOptions); if (assignedJobs.Count == 0) { return(0); } try { while ((assignedJobs.Count > 0 || monitoredJobTasks.Count > 0 || experiments.Count > 0) && !cancellationTokenSource.IsCancellationRequested) { //Create view-models for the jobs and execute them remotely foreach (Job job in assignedJobs) { dispatcher.JobAssigned?.Invoke(job); monitoredJobTasks.Add(job.SendJobAndMonitor(dispatcher)); } // All pending experiments sent? Then we await completion to retry in case something fails if (experiments.Count == 0) { Task.WhenAll(monitoredJobTasks).Wait(); foreach (Task <Job> task in monitoredJobTasks) { dispatcher.JobFinished?.Invoke(task.Result); } dispatcher.Log?.Invoke("All the experiments have finished"); break; } // Wait for the first agent to finish and give it something to do Task <Job> finishedTask = await Task.WhenAny(monitoredJobTasks); Job finishedJob = await finishedTask; dispatcher.Log?.Invoke("Job finished: " + finishedJob.ToString()); //A job finished monitoredJobTasks.Remove(finishedTask); if (!cancellationTokenSource.IsCancellationRequested) { numExperimentalUnitsRun += finishedJob.ExperimentalUnits.Count; dispatcher.JobFinished?.Invoke(finishedJob); } if (finishedJob.FailedExperimentalUnits.Count > 0) { experiments.AddRange(finishedJob.FailedExperimentalUnits); dispatcher.Log?.Invoke(finishedJob.FailedExperimentalUnits.Count + " failed experiments enqueued again for further trials"); } // Add the herd agent to the free agent list if (!freeHerdAgents.Contains(finishedJob.HerdAgent)) { freeHerdAgents.Add(finishedJob.HerdAgent); } // Assign experiments to free agents if (!cancellationTokenSource.IsCancellationRequested) { AssignExperiments(ref experiments, ref freeHerdAgents, ref assignedJobs, jobDispatcherOptions); } } } catch (Exception ex) { dispatcher.Log?.Invoke("Exception in runExperimentQueueRemotely()"); dispatcher.Log?.Invoke(ex.StackTrace); } finally { if (cancellationTokenSource.IsCancellationRequested) { //the user cancelled, need to add unfinished experimental units to the pending list foreach (Job job in assignedJobs) { experiments.AddRange(job.ExperimentalUnits); } } else { foreach (Task <Job> job in monitoredJobTasks) { numExperimentalUnitsRun += job.Result.ExperimentalUnits.Count - job.Result.FailedExperimentalUnits.Count; } } } return(numExperimentalUnitsRun); }