/// <summary> /// Callback when Azure Batch Monitor report jon state /// </summary> private async void JobMonitor_OnReportJobState(Telepathy.Session.Data.JobState state, List <TaskInfo> stateChangedTaskList, bool shouldExit) { if (state != this.currentState) { lock (this.changeJobStateLock) { if (state != this.currentState) { this.currentState = state; if (this.context != null) { try { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Job state change event triggered, new state received from AzureBatchJobMonitor: {0}", state); ISchedulerNotify proxy = this.context; proxy.JobStateChanged(state); } catch (System.ObjectDisposedException e) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is disposed: {0}, lose connection to broker.", e); this.context = null; } catch (CommunicationException e) { // Channel is aborted, set the context to null TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is aborted: {0}", e); this.context = null; } catch (Exception e) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Failed to trigger job state change event: {0}", e); } } } } } if (stateChangedTaskList != null) { if (this.context != null) { try { ISchedulerNotify proxy = this.context; await proxy.TaskStateChanged(stateChangedTaskList); TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Task state change event triggered."); } catch (System.ObjectDisposedException e) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is disposed: {0}, lose connection to broker", e); this.context = null; } catch (CommunicationException e) { // Channel is aborted, set the context to null TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is aborted: {0}", e); this.context = null; } catch (Exception e) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Failed to trigger task state change event: {0}", e); } } } if (shouldExit) { if (this.Exit != null) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Exit AzureBatchJobMonitor Entry"); this.Exit(this, EventArgs.Empty); } } }
/// <summary> /// Query task info /// </summary> private void QueryTaskInfo() { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Enters QueryTaskInfo method."); if (Interlocked.Increment(ref this.registerdPullTask) != 1) { // register count doesn't change from 0 to 1 means somebody is pulling task, quit return; } bool shouldExit = false; List <TaskInfo> taskInfoList = null; this.pullTaskGap = PullTaskMinGap; while (true) { try { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Starting get job counters."); ISchedulerJobCounters counters = this.schedulerJob.GetCounters(); JobInfo jobInfo = new JobInfo(this.sessionid, counters); JobState state = this.schedulerJob.State; TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Starting query task info: JobState = {0}\nJobInfo: {1}", state, jobInfo); if (state != this.previousState) { this.previousState = state; if (this.context != null) { // Bug 7144: dispose JobMonitorEntry instance and unsubscribe events if job state changed to Canceled/Finished/Failed shouldExit = (0 == isRequeuingJob) && (state == JobState.Canceled || state == JobState.Finished || state == JobState.Failed); try { // ignore JobState change that happened during job requeue operation. // Note: requeue job takes 3 steps: cancel job, configure job, submit job. Job state transitions during job requeue, i.e., // (running) -> cancelling -> cancelled -> configuring -> submitted -> validating ->(queued), will all be ignored. if (0 == this.isRequeuingJob) { ISchedulerNotify proxy = this.context; proxy.JobStateChanged(JobStateConverter.FromHpcJobState(state)).ContinueWith(this.OnEndJobStateChanged); TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Job state change event triggered, new state: {0}", state); } } catch (CommunicationException e) { // Channel is aborted, set the context to null TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Callback channel is aborted: {0}", e); this.context = null; } catch (Exception e) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Failed to trigger job state change event: {0}", e); } } } if (this.context != null && (taskInfoList == null || !jobInfo.Equals(this.previousJobInfo))) { try { taskInfoList = this.GetTaskInfo(); if (taskInfoList != null) { ISchedulerNotify proxy = this.context; proxy.TaskStateChanged(taskInfoList).ContinueWith(this.OnEndTaskStateChanged, jobInfo); TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Task state change event triggered."); } } catch (CommunicationException e) { // Channel is aborted, set the context to null TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Callback channel is aborted: {0}", e); this.context = null; } catch (Exception e) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Failed to trigger task state change event: {0}", e); } } } catch (Exception e) { TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[JobMonitorEntry] Exception thrown when querying task info: {0}", e); } // pull task is not registered, quit if (Interlocked.Decrement(ref this.registerdPullTask) == 0) { break; } TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Waiting {0} miliseconds and start another round of getting task info.", this.pullTaskGap); // Sleep and pull task again, clear the register pull task flag Thread.Sleep(this.pullTaskGap); if (this.pullTaskGap < PullTaskMaxGap) { this.pullTaskGap *= 2; if (this.pullTaskGap > PullTaskMaxGap) { this.pullTaskGap = PullTaskMaxGap; } } this.registerdPullTask = 1; } if (shouldExit) { if (this.Exit != null) { this.Exit(this, EventArgs.Empty); } } }