Exemplo n.º 1
0
        /// <summary>
        /// Callback when Azure Batch Monitor report jon state
        /// </summary>
        private async void JobMonitor_OnReportJobState(Telepathy.Session.Data.JobState state, List <TaskInfo> stateChangedTaskList, bool shouldExit)
        {
            if (state != this.currentState)
            {
                lock (this.changeJobStateLock)
                {
                    if (state != this.currentState)
                    {
                        this.currentState = state;
                        if (this.context != null)
                        {
                            try
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Job state change event triggered, new state received from AzureBatchJobMonitor: {0}", state);
                                ISchedulerNotify proxy = this.context;
                                proxy.JobStateChanged(state);
                            }
                            catch (System.ObjectDisposedException e)
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is disposed: {0}, lose connection to broker.", e);
                                this.context = null;
                            }
                            catch (CommunicationException e)
                            {
                                // Channel is aborted, set the context to null
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is aborted: {0}", e);
                                this.context = null;
                            }
                            catch (Exception e)
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Failed to trigger job state change event: {0}", e);
                            }
                        }
                    }
                }
            }

            if (stateChangedTaskList != null)
            {
                if (this.context != null)
                {
                    try
                    {
                        ISchedulerNotify proxy = this.context;
                        await proxy.TaskStateChanged(stateChangedTaskList);

                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Task state change event triggered.");
                    }
                    catch (System.ObjectDisposedException e)
                    {
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is disposed: {0}, lose connection to broker", e);
                        this.context = null;
                    }
                    catch (CommunicationException e)
                    {
                        // Channel is aborted, set the context to null
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is aborted: {0}", e);
                        this.context = null;
                    }
                    catch (Exception e)
                    {
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Failed to trigger task state change event: {0}", e);
                    }
                }
            }

            if (shouldExit)
            {
                if (this.Exit != null)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Exit AzureBatchJobMonitor Entry");
                    this.Exit(this, EventArgs.Empty);
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Query task info
        /// </summary>
        private void QueryTaskInfo()
        {
            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Enters QueryTaskInfo method.");
            if (Interlocked.Increment(ref this.registerdPullTask) != 1)
            {
                // register count doesn't change from 0 to 1 means somebody is pulling task, quit
                return;
            }

            bool            shouldExit   = false;
            List <TaskInfo> taskInfoList = null;

            this.pullTaskGap = PullTaskMinGap;

            while (true)
            {
                try
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Starting get job counters.");
                    ISchedulerJobCounters counters = this.schedulerJob.GetCounters();

                    JobInfo  jobInfo = new JobInfo(this.sessionid, counters);
                    JobState state   = this.schedulerJob.State;

                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Starting query task info: JobState = {0}\nJobInfo: {1}", state, jobInfo);
                    if (state != this.previousState)
                    {
                        this.previousState = state;
                        if (this.context != null)
                        {
                            // Bug 7144: dispose JobMonitorEntry instance and unsubscribe events if job state changed to Canceled/Finished/Failed
                            shouldExit = (0 == isRequeuingJob) && (state == JobState.Canceled || state == JobState.Finished || state == JobState.Failed);

                            try
                            {
                                // ignore JobState change that happened during job requeue operation.
                                // Note: requeue job takes 3 steps: cancel job, configure job, submit job.  Job state transitions during job requeue, i.e.,
                                // (running) -> cancelling -> cancelled -> configuring -> submitted -> validating ->(queued), will all be ignored.
                                if (0 == this.isRequeuingJob)
                                {
                                    ISchedulerNotify proxy = this.context;
                                    proxy.JobStateChanged(JobStateConverter.FromHpcJobState(state)).ContinueWith(this.OnEndJobStateChanged);
                                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Job state change event triggered, new state: {0}", state);
                                }
                            }
                            catch (CommunicationException e)
                            {
                                // Channel is aborted, set the context to null
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Callback channel is aborted: {0}", e);
                                this.context = null;
                            }
                            catch (Exception e)
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Failed to trigger job state change event: {0}", e);
                            }
                        }
                    }

                    if (this.context != null && (taskInfoList == null || !jobInfo.Equals(this.previousJobInfo)))
                    {
                        try
                        {
                            taskInfoList = this.GetTaskInfo();

                            if (taskInfoList != null)
                            {
                                ISchedulerNotify proxy = this.context;
                                proxy.TaskStateChanged(taskInfoList).ContinueWith(this.OnEndTaskStateChanged, jobInfo);
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Task state change event triggered.");
                            }
                        }
                        catch (CommunicationException e)
                        {
                            // Channel is aborted, set the context to null
                            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Callback channel is aborted: {0}", e);
                            this.context = null;
                        }
                        catch (Exception e)
                        {
                            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Failed to trigger task state change event: {0}", e);
                        }
                    }
                }
                catch (Exception e)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[JobMonitorEntry] Exception thrown when querying task info: {0}", e);
                }

                // pull task is not registered, quit
                if (Interlocked.Decrement(ref this.registerdPullTask) == 0)
                {
                    break;
                }

                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Waiting {0} miliseconds and start another round of getting task info.", this.pullTaskGap);

                // Sleep and pull task again, clear the register pull task flag
                Thread.Sleep(this.pullTaskGap);
                if (this.pullTaskGap < PullTaskMaxGap)
                {
                    this.pullTaskGap *= 2;
                    if (this.pullTaskGap > PullTaskMaxGap)
                    {
                        this.pullTaskGap = PullTaskMaxGap;
                    }
                }

                this.registerdPullTask = 1;
            }

            if (shouldExit)
            {
                if (this.Exit != null)
                {
                    this.Exit(this, EventArgs.Empty);
                }
            }
        }