Beispiel #1
0
        public static async Task <int> RunExperimentsAsync(List <ExperimentalUnit> experiments, List <HerdAgentInfo> freeHerdAgents
                                                           , Monitoring.MsgDispatcher dispatcher
                                                           , CancellationTokenSource cancellationTokenSource
                                                           , JobDispatcherOptions jobDispatcherOptions = null)
        {
            List <Job>         assignedJobs      = new List <Job>();
            List <Task <Job> > monitoredJobTasks = new List <Task <Job> >();
            int numExperimentalUnitsRun          = 0;

            // Calculate run-time requirements
            foreach (ExperimentalUnit experimentalUnit in experiments)
            {
                experimentalUnit.RequestRuntimeRequirements();
            }

            // Assign experiments to free agents
            AssignExperiments(ref experiments, ref freeHerdAgents, ref assignedJobs, jobDispatcherOptions);

            if (assignedJobs.Count == 0)
            {
                return(0);
            }
            try
            {
                while ((assignedJobs.Count > 0 || monitoredJobTasks.Count > 0 ||
                        experiments.Count > 0) && !cancellationTokenSource.IsCancellationRequested)
                {
                    //Create view-models for the jobs and execute them remotely
                    foreach (Job job in assignedJobs)
                    {
                        dispatcher.JobAssigned?.Invoke(job);

                        monitoredJobTasks.Add(job.SendJobAndMonitor(dispatcher));
                    }

                    // All pending experiments sent? Then we await completion to retry in case something fails
                    if (experiments.Count == 0)
                    {
                        Task.WhenAll(monitoredJobTasks).Wait();
                        foreach (Task <Job> task in monitoredJobTasks)
                        {
                            dispatcher.JobFinished?.Invoke(task.Result);
                        }
                        dispatcher.Log?.Invoke("All the experiments have finished");
                        break;
                    }

                    // Wait for the first agent to finish and give it something to do
                    Task <Job> finishedTask = await Task.WhenAny(monitoredJobTasks);

                    Job finishedJob = await finishedTask;
                    dispatcher.Log?.Invoke("Job finished: " + finishedJob.ToString());

                    //A job finished
                    monitoredJobTasks.Remove(finishedTask);
                    if (!cancellationTokenSource.IsCancellationRequested)
                    {
                        numExperimentalUnitsRun += finishedJob.ExperimentalUnits.Count;
                        dispatcher.JobFinished?.Invoke(finishedJob);
                    }


                    if (finishedJob.FailedExperimentalUnits.Count > 0)
                    {
                        experiments.AddRange(finishedJob.FailedExperimentalUnits);
                        dispatcher.Log?.Invoke(finishedJob.FailedExperimentalUnits.Count + " failed experiments enqueued again for further trials");
                    }

                    // Add the herd agent to the free agent list
                    if (!freeHerdAgents.Contains(finishedJob.HerdAgent))
                    {
                        freeHerdAgents.Add(finishedJob.HerdAgent);
                    }

                    // Assign experiments to free agents
                    if (!cancellationTokenSource.IsCancellationRequested)
                    {
                        AssignExperiments(ref experiments, ref freeHerdAgents, ref assignedJobs, jobDispatcherOptions);
                    }
                }
            }
            catch (Exception ex)
            {
                dispatcher.Log?.Invoke("Exception in runExperimentQueueRemotely()");
                dispatcher.Log?.Invoke(ex.StackTrace);
            }
            finally
            {
                if (cancellationTokenSource.IsCancellationRequested)
                {
                    //the user cancelled, need to add unfinished experimental units to the pending list
                    foreach (Job job in assignedJobs)
                    {
                        experiments.AddRange(job.ExperimentalUnits);
                    }
                }
                else
                {
                    foreach (Task <Job> job in monitoredJobTasks)
                    {
                        numExperimentalUnitsRun += job.Result.ExperimentalUnits.Count - job.Result.FailedExperimentalUnits.Count;
                    }
                }
            }
            return(numExperimentalUnitsRun);
        }
Beispiel #2
0
        /// <summary>
        /// Async method that sends the job to the herd agent and monitors its evolution, using the
        /// callback funciton in dispatcher to notify the client of any event
        /// </summary>
        /// <param name="dispatcher">The event dispatching data</param>
        /// <returns>An awaitable task that returns the finished job</returns>
        public async Task <Job> SendJobAndMonitor(Monitoring.MsgDispatcher dispatcher)
        {
            Shepherd shepherd = new Shepherd();

            shepherd.SetLogMessageHandler(dispatcher.Log);

            try
            {
                PrepareForExecution(); // compute inputs/outputs...

                //Let the dispatcher know which experimental units were created
                foreach (ExperimentalUnit expUnit in ExperimentalUnits)
                {
                    dispatcher.ExperimentalUnitLaunched?.Invoke(this, expUnit);
                }

                bool bConnected = shepherd.ConnectToHerdAgent(HerdAgent.ipAddress);
                if (bConnected)
                {
                    dispatcher.Log?.Invoke("Sending job to herd agent " + HerdAgent.ipAddress);
                    dispatcher.AllStatesChanged?.Invoke(this, Monitoring.State.SENDING);

                    shepherd.SendJobQuery(this, dispatcher.CancelToken);
                    dispatcher.Log?.Invoke("Job sent to herd agent " + HerdAgent.ipAddress);

                    dispatcher.AllStatesChanged(this, Monitoring.State.RUNNING);
                }
                else
                {
                    ///SOLVE THIS: failed experimental units should be dealt by the caller
                    ///
                    foreach (ExperimentalUnit exp in ExperimentalUnits)
                    {
                        FailedExperimentalUnits.Add(exp);
                    }

                    dispatcher.AllStatesChanged?.Invoke(this, Monitoring.State.ERROR);
                    dispatcher.Log?.Invoke("Failed to connect to herd agent " + HerdAgent.ipAddress);

                    return(this);
                }
                dispatcher.Log?.Invoke("Monitoring remote job run by herd agent " + HerdAgent.ipAddress);
                // Monitor the remote job
                while (true)
                {
                    int numBytesRead = await shepherd.ReadAsync(dispatcher.CancelToken);

                    dispatcher.CancelToken.ThrowIfCancellationRequested();

                    string xmlItem = shepherd.m_xmlStream.processNextXMLItem();

                    while (xmlItem != "")
                    {
                        string experimentId = shepherd.m_xmlStream.getLastXMLItemTag();
                        string message      = shepherd.m_xmlStream.getLastXMLItemContent();

                        string messageId      = shepherd.m_xmlStream.getLastXMLItemTag(); //previous call to getLastXMLItemContent resets lastXMLItem
                        string messageContent = shepherd.m_xmlStream.getLastXMLItemContent();

                        if (experimentId == XMLStream.m_defaultMessageType)
                        {
                            //The message comes from the herd agent, must be sending results
                            dispatcher.Log?.Invoke("Receiving job results");
                            dispatcher.AllStatesChanged?.Invoke(this, Monitoring.State.RECEIVING);

                            bool bret = await shepherd.ReceiveJobResult(dispatcher.CancelToken);

                            dispatcher.AllStatesChanged?.Invoke(this, Monitoring.State.FINISHED);

                            m_bFinished = true;

                            dispatcher.Log?.Invoke("Job results received");
                            return(this);
                        }
                        else //the message comes from an experimental unit, the dispatcher will deal with it
                        {
                            dispatcher.MessageReceived?.Invoke(this, experimentId, messageId, messageContent);
                        }

                        xmlItem = shepherd.m_xmlStream.processNextXMLItem();
                    }
                }
            }
            catch (OperationCanceledException)
            {
                //quit remote jobs
                dispatcher.Log?.Invoke("Cancellation requested by user");
                shepherd.WriteMessage(Shepherd.m_quitMessage, true);
                await shepherd.ReadAsync(new CancellationToken()); //we synchronously wait until we get the ack from the client

                m_bCancelled = true;
            }
            catch (Exception ex)
            {
                dispatcher.Log?.Invoke("Unhandled exception in Badger.sendJobAndMonitor(). Agent " + HerdAgent.ipAddress);
                dispatcher.Log?.Invoke(ex.ToString());
                FailedExperimentalUnits.Clear();
                FailedExperimentalUnits.AddRange(ExperimentalUnits);
            }
            finally
            {
                dispatcher.Log?.Invoke("Disconnected from herd agent " + HerdAgent.ipAddress);
                shepherd.Disconnect();
            }
            return(this);
        }