public ExperimentQueueMonitorViewModel(List <HerdAgentViewModel> freeHerdAgents , List <Experiment> experiments, PlotViewModel evaluationMonitor , Logger.LogFunction logFunctionDelegate) { m_evaluationMonitor = evaluationMonitor; m_herdAgentList = freeHerdAgents; logFunction = logFunctionDelegate; foreach (Experiment exp in experiments) { MonitoredExperimentViewModel monitoredExperiment = new MonitoredExperimentViewModel(exp, evaluationMonitor); m_monitoredExperimentBatchList.Add(monitoredExperiment); m_pendingExperiments.Add(monitoredExperiment); } NotifyOfPropertyChange(() => monitoredExperimentBatchList); }
public ExperimentQueueMonitorViewModel(List<HerdAgentViewModel> freeHerdAgents , List<Experiment> experiments, PlotViewModel evaluationMonitor , Logger.LogFunction logFunctionDelegate) { m_evaluationMonitor = evaluationMonitor; m_herdAgentList = freeHerdAgents; logFunction = logFunctionDelegate; foreach (Experiment exp in experiments) { MonitoredExperimentViewModel monitoredExperiment= new MonitoredExperimentViewModel(exp,evaluationMonitor); m_monitoredExperimentBatchList.Add(monitoredExperiment); m_pendingExperiments.Add(monitoredExperiment); } NotifyOfPropertyChange(() => monitoredExperimentBatchList); }
public async Task <ExperimentBatch> sendJobAndMonitor() { m_failedExperiments.Clear(); try { //SEND THE JOB DATA m_monitoredExperiments.ForEach((exp) => exp.state = MonitoredExperimentViewModel.ExperimentState.WAITING_EXECUTION); CJob job = getJob(); bool bConnected = m_shepherd.connectToHerdAgent(m_herdAgent.ipAddress); if (bConnected) { logMessage("Sending job to herd agent " + m_herdAgent.ipAddress); m_monitoredExperiments.ForEach((exp) => exp.state = MonitoredExperimentViewModel.ExperimentState.SENDING); m_herdAgent.status = "Sending job query"; m_shepherd.SendJobQuery(job, m_cancelToken); logMessage("Job sent to herd agent " + m_herdAgent.ipAddress); //await m_shepherd.waitAsyncWriteOpsToFinish(); m_monitoredExperiments.ForEach((exp) => exp.state = MonitoredExperimentViewModel.ExperimentState.RUNNING); m_herdAgent.status = "Executing job query"; } else { foreach (MonitoredExperimentViewModel exp in m_monitoredExperiments) { m_failedExperiments.Add(exp); } m_monitoredExperiments.ForEach((exp) => exp.state = MonitoredExperimentViewModel.ExperimentState.ERROR); logMessage("Failed to connect to herd agent " + m_herdAgent.ipAddress); return(this); } logMessage("Monitoring remote job run by herd agent " + m_herdAgent.ipAddress); //MONITOR THE REMOTE JOB while (true) { int numBytesRead = await m_shepherd.readAsync(m_cancelToken); m_cancelToken.ThrowIfCancellationRequested(); string xmlItem = m_shepherd.m_xmlStream.processNextXMLItem(); while (xmlItem != "") { string experimentId = m_shepherd.m_xmlStream.getLastXMLItemTag(); string message = m_shepherd.m_xmlStream.getLastXMLItemContent(); MonitoredExperimentViewModel experimentVM = m_monitoredExperiments.Find(exp => exp.name == experimentId); string messageId = m_shepherd.m_xmlStream.getLastXMLItemTag(); //previous call to getLastXMLItemContent reset lastXMLItem string messageContent = m_shepherd.m_xmlStream.getLastXMLItemContent(); if (experimentVM != null) { if (messageId == "Progress") { double progress = double.Parse(messageContent, CultureInfo.InvariantCulture); experimentVM.progress = Convert.ToInt32(progress); } else if (messageId == "Evaluation") { //<Evaluation>0.0,-1.23</Evaluation> string [] values = messageContent.Split(','); string seriesName = experimentVM.name; int seriesId; if (values.Length == 2) { if (!m_experimentSeriesId.Keys.Contains(experimentVM.name)) { seriesId = m_evaluationPlot.addLineSeries(seriesName); m_experimentSeriesId.Add(seriesName, seriesId); } else { seriesId = m_experimentSeriesId[seriesName]; } m_evaluationPlot.addLineSeriesValue(seriesId, double.Parse(values[0], CultureInfo.InvariantCulture) , double.Parse(values[1], CultureInfo.InvariantCulture)); } } else if (messageId == "Message") { experimentVM.addStatusInfoLine(messageContent); } else if (messageId == "End") { if (messageContent == "Ok") { logMessage("Job finished sucessfully"); experimentVM.state = MonitoredExperimentViewModel.ExperimentState.WAITING_RESULT; } else { logMessage("Remote job execution wasn't successful"); //Right now, my view on adding failed experiments back to the pending exp. list: //Some experiments may fail because the parameters are just invalid (i.e. FAST) //Much more likely than a network-related error or some other user-related problem //m_failedExperiments.Add(experimentVM); experimentVM.state = MonitoredExperimentViewModel.ExperimentState.ERROR; } } } else { if (experimentId == XMLStream.m_defaultMessageType) { //if (content == CJobDispatcher.m_endMessage) { //job results can be expected to be sent back even if some of the tasks failed logMessage("Receiving job results"); m_monitoredExperiments.ForEach((exp) => exp.state = MonitoredExperimentViewModel.ExperimentState.RECEIVING); m_herdAgent.status = "Receiving output files"; bool bret = await m_shepherd.ReceiveJobResult(m_cancelToken); m_monitoredExperiments.ForEach((exp) => exp.state = MonitoredExperimentViewModel.ExperimentState.FINISHED); m_herdAgent.status = "Finished"; logMessage("Job results received"); return(this); } } } xmlItem = m_shepherd.m_xmlStream.processNextXMLItem(); } } } catch (OperationCanceledException) { //quit remote jobs logMessage("Cancellation requested by user"); m_shepherd.writeMessage(Shepherd.m_quitMessage, true); await m_shepherd.readAsync(new CancellationToken()); //we synchronously wait until we get the ack from the client m_monitoredExperiments.ForEach((exp) => { exp.resetState(); }); m_herdAgent.status = ""; } catch (Exception ex) { logMessage("Unhandled exception in Badger.sendJobAndMonitor(). Agent " + m_herdAgent.ipAddress); logMessage(ex.ToString()); m_failedExperiments.Clear(); foreach (MonitoredExperimentViewModel exp in m_monitoredExperiments) { m_failedExperiments.Add(exp); } Console.WriteLine(ex.StackTrace); } finally { logMessage("Disconnected from herd agent " + m_herdAgent.ipAddress); m_shepherd.disconnect(); } return(this); }