Ejemplo n.º 1
0
        /// <summary>
        /// Start the monitor
        /// </summary>
        public JobState Start(OperationContext context)
        {
            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Start monitor.");

            this.lastStartTime    = DateTime.UtcNow;
            this.lastResponseTime = DateTime.MinValue;
            this.ResetPreviousValues();
            this.context = context.GetCallbackChannel <ISchedulerNotify>();

            this.ResetSchedulerJob();

            if (this.schedulerJob.State == JobState.Canceled)
            {
                ThrowHelper.ThrowSessionFault(SOAFaultCode.Session_ValidateJobFailed_JobCanceled, SR.SessionLauncher_ValidateJobFailed_JobCanceled, this.sessionid.ToString());
            }

            this.CalculateMinAndMax();

            ThreadPool.QueueUserWorkItem(this.CallbackToQueryTaskInfo, false);
            return(this.schedulerJob.State);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Start the monitor
        /// </summary>
        public async Task <CloudJob> StartAsync(System.ServiceModel.OperationContext context)
        {
            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Start monitor Entry.");
            this.currentState = Telepathy.Session.Data.JobState.Queued;
            this.context      = context.GetCallbackChannel <ISchedulerNotify>();
            this.cloudJob     = await this.batchClient.JobOperations.GetJobAsync(AzureBatchSessionJobIdConverter.ConvertToAzureBatchJobId(this.sessionid));

            if (this.cloudJob.State == JobState.Disabled)
            {
                ThrowHelper.ThrowSessionFault(SOAFaultCode.Session_ValidateJobFailed_JobCanceled, SR.SessionLauncher_ValidateJobFailed_JobCanceled, this.sessionid.ToString());
            }

            if (this.cloudJob.Metadata != null)
            {
                MetadataItem maxUnitsItem = this.cloudJob.Metadata.FirstOrDefault(item => item.Name == "MaxUnits");
                if (maxUnitsItem != null)
                {
                    if (Int32.TryParse(maxUnitsItem.Value, out int result))
                    {
                        this.maxUnits = result;
                    }
                }
            }

            // monitor batch job state
            this.batchJobMonitor = new AzureBatchJobMonitor(this.sessionid, this.JobMonitor_OnReportJobState);
            try
            {
                Task.Run(() => this.StartMonitorAsync());
            }
            catch (Exception e)
            {
                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[AzureBatchJobMonitorEntry] Exception thrown when start Azure Batch Job Monitor: {0}", e);
            }

            return(this.cloudJob);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Callback when Azure Batch Monitor report jon state
        /// </summary>
        private async void JobMonitor_OnReportJobState(Telepathy.Session.Data.JobState state, List <TaskInfo> stateChangedTaskList, bool shouldExit)
        {
            if (state != this.currentState)
            {
                lock (this.changeJobStateLock)
                {
                    if (state != this.currentState)
                    {
                        this.currentState = state;
                        if (this.context != null)
                        {
                            try
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Job state change event triggered, new state received from AzureBatchJobMonitor: {0}", state);
                                ISchedulerNotify proxy = this.context;
                                proxy.JobStateChanged(state);
                            }
                            catch (System.ObjectDisposedException e)
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is disposed: {0}, lose connection to broker.", e);
                                this.context = null;
                            }
                            catch (CommunicationException e)
                            {
                                // Channel is aborted, set the context to null
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is aborted: {0}", e);
                                this.context = null;
                            }
                            catch (Exception e)
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Failed to trigger job state change event: {0}", e);
                            }
                        }
                    }
                }
            }

            if (stateChangedTaskList != null)
            {
                if (this.context != null)
                {
                    try
                    {
                        ISchedulerNotify proxy = this.context;
                        await proxy.TaskStateChanged(stateChangedTaskList);

                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Task state change event triggered.");
                    }
                    catch (System.ObjectDisposedException e)
                    {
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is disposed: {0}, lose connection to broker", e);
                        this.context = null;
                    }
                    catch (CommunicationException e)
                    {
                        // Channel is aborted, set the context to null
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Callback channel is aborted: {0}", e);
                        this.context = null;
                    }
                    catch (Exception e)
                    {
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[AzureBatchJobMonitorEntry] Failed to trigger task state change event: {0}", e);
                    }
                }
            }

            if (shouldExit)
            {
                if (this.Exit != null)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitorEntry] Exit AzureBatchJobMonitor Entry");
                    this.Exit(this, EventArgs.Empty);
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Query task info
        /// </summary>
        private void QueryTaskInfo()
        {
            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Enters QueryTaskInfo method.");
            if (Interlocked.Increment(ref this.registerdPullTask) != 1)
            {
                // register count doesn't change from 0 to 1 means somebody is pulling task, quit
                return;
            }

            bool            shouldExit   = false;
            List <TaskInfo> taskInfoList = null;

            this.pullTaskGap = PullTaskMinGap;

            while (true)
            {
                try
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Starting get job counters.");
                    ISchedulerJobCounters counters = this.schedulerJob.GetCounters();

                    JobInfo  jobInfo = new JobInfo(this.sessionid, counters);
                    JobState state   = this.schedulerJob.State;

                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Starting query task info: JobState = {0}\nJobInfo: {1}", state, jobInfo);
                    if (state != this.previousState)
                    {
                        this.previousState = state;
                        if (this.context != null)
                        {
                            // Bug 7144: dispose JobMonitorEntry instance and unsubscribe events if job state changed to Canceled/Finished/Failed
                            shouldExit = (0 == isRequeuingJob) && (state == JobState.Canceled || state == JobState.Finished || state == JobState.Failed);

                            try
                            {
                                // ignore JobState change that happened during job requeue operation.
                                // Note: requeue job takes 3 steps: cancel job, configure job, submit job.  Job state transitions during job requeue, i.e.,
                                // (running) -> cancelling -> cancelled -> configuring -> submitted -> validating ->(queued), will all be ignored.
                                if (0 == this.isRequeuingJob)
                                {
                                    ISchedulerNotify proxy = this.context;
                                    proxy.JobStateChanged(JobStateConverter.FromHpcJobState(state)).ContinueWith(this.OnEndJobStateChanged);
                                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Job state change event triggered, new state: {0}", state);
                                }
                            }
                            catch (CommunicationException e)
                            {
                                // Channel is aborted, set the context to null
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Callback channel is aborted: {0}", e);
                                this.context = null;
                            }
                            catch (Exception e)
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Failed to trigger job state change event: {0}", e);
                            }
                        }
                    }

                    if (this.context != null && (taskInfoList == null || !jobInfo.Equals(this.previousJobInfo)))
                    {
                        try
                        {
                            taskInfoList = this.GetTaskInfo();

                            if (taskInfoList != null)
                            {
                                ISchedulerNotify proxy = this.context;
                                proxy.TaskStateChanged(taskInfoList).ContinueWith(this.OnEndTaskStateChanged, jobInfo);
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Task state change event triggered.");
                            }
                        }
                        catch (CommunicationException e)
                        {
                            // Channel is aborted, set the context to null
                            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Callback channel is aborted: {0}", e);
                            this.context = null;
                        }
                        catch (Exception e)
                        {
                            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Error, "[JobMonitorEntry] Failed to trigger task state change event: {0}", e);
                        }
                    }
                }
                catch (Exception e)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[JobMonitorEntry] Exception thrown when querying task info: {0}", e);
                }

                // pull task is not registered, quit
                if (Interlocked.Decrement(ref this.registerdPullTask) == 0)
                {
                    break;
                }

                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Waiting {0} miliseconds and start another round of getting task info.", this.pullTaskGap);

                // Sleep and pull task again, clear the register pull task flag
                Thread.Sleep(this.pullTaskGap);
                if (this.pullTaskGap < PullTaskMaxGap)
                {
                    this.pullTaskGap *= 2;
                    if (this.pullTaskGap > PullTaskMaxGap)
                    {
                        this.pullTaskGap = PullTaskMaxGap;
                    }
                }

                this.registerdPullTask = 1;
            }

            if (shouldExit)
            {
                if (this.Exit != null)
                {
                    this.Exit(this, EventArgs.Empty);
                }
            }
        }