/// <summary>
        /// Start to subscribe the job and task event
        /// </summary>
        /// <param name="jobid">indicating the job id</param>
        /// <param name="autoMax">indicating the auto max property of the job</param>
        /// <param name="autoMin">indicating the auto min property of the job</param>
        public async Task <(JobState jobState, int autoMax, int autoMin)> RegisterJobAsync(string jobid)
        {
            Trace.TraceInformation($"[AzureBatchSchedulerDelegation] Begin: RegisterJob, job id is {jobid}...");
            //CheckBrokerAccess(jobid);

            int      autoMax = 0, autoMin = 0;
            CloudJob batchJob;

            try
            {
                AzureBatchJobMonitorEntry jobMonitorEntry;
                lock (this.JobMonitors)
                {
                    if (!this.JobMonitors.TryGetValue(jobid, out jobMonitorEntry))
                    {
                        jobMonitorEntry       = new AzureBatchJobMonitorEntry(jobid);
                        jobMonitorEntry.Exit += new EventHandler(this.JobMonitorEntry_Exit);
                    }
                }

                batchJob = await jobMonitorEntry.StartAsync(System.ServiceModel.OperationContext.Current);

                // Bug 18050: Only add/update the instance if it succeeded to
                // open the job.
                lock (this.JobMonitors)
                {
                    this.JobMonitors[jobid] = jobMonitorEntry;
                }

                autoMin = jobMonitorEntry.MinUnits;
                autoMax = jobMonitorEntry.MaxUnits;
            }
            catch (Exception e)
            {
                Trace.TraceError($"[AzureBatchSchedulerDelegation] Exception thrown while registering job: {jobid}", e);
                throw;
            }

            Trace.TraceInformation($"[AzureBatchSchedulerDelegation] End: RegisterJob. Current job state = {batchJob.State}.");
            return(await AzureBatchJobStateConverter.FromAzureBatchJobAsync(batchJob), autoMax, autoMin);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Query job info
        /// </summary>
        private async Task QueryJobChangeAsync()
        {
            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose,
                                   "[AzureBatchJobMonitorEntry] Enters QueryTaskInfo method.");
            bool shouldExit = false;

            this.pullJobGap = PullJobMinGap;
            JobState state = JobState.Active;

            Session.Data.JobState currentJobState = Session.Data.JobState.Configuring;
            var    pool    = this.batchClient.PoolOperations.GetPool(AzureBatchConfiguration.BatchPoolName);
            string skuName = pool.VirtualMachineSize;

            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitor] VMSize in pool is {0}",
                                   skuName);
            SKU targetSku = Array.Find(this.skus, sku => sku.Name.Equals(skuName, StringComparison.OrdinalIgnoreCase));

            this.nodeCapacity = targetSku.VCPUs;
            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information,
                                   "[AzureBatchJobMonitor] Node capacity in pool is {0}", nodeCapacity);

            ODATADetailLevel detailLevel = new ODATADetailLevel();

            detailLevel.SelectClause = "affinityId, ipAddress";
            var nodes = await pool.ListComputeNodes(detailLevel).ToListAsync();

            while (true)
            {
                if (shouldExit)
                {
                    break;
                }
                List <TaskInfo> stateChangedTaskList = new List <TaskInfo>();

                try
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[AzureBatchJobMonitor] Starting get job state.");
                    ODATADetailLevel detail = new ODATADetailLevel(selectClause: "state");
                    this.cloudJob = await this.batchClient.JobOperations.GetJobAsync(this.cloudJob.Id);

                    state           = this.cloudJob.State.HasValue ? this.cloudJob.State.Value : state;
                    currentJobState = await AzureBatchJobStateConverter.FromAzureBatchJobAsync(this.cloudJob);

                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitor] Current job state in AzureBatch: JobState = {0}\n", state);
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitor] Current job state in Telepathy: JobState = {0}\n", currentJobState);
                    stateChangedTaskList = await this.GetTaskStateChangeAsync(nodes);

                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitor] Previous job state report to AzureBatchJobMonitorEntry: JobState = {0}\n", previousJobState);
                    if (state == JobState.Completed || state == JobState.Disabled)
                    {
                        if (this.previousJobState == Session.Data.JobState.Canceling)
                        {
                            currentJobState = Session.Data.JobState.Canceled;
                        }
                        shouldExit = true;
                    }
                    else if (this.previousJobState == Session.Data.JobState.Canceling && !shouldExit)
                    {
                        //Override current job state as Canceling, because when all tasks turn to be completed, the job state converter will make job state finishing.
                        //If one job is cancelling in previous state and now is not in one terminated state, keep to reporting cancelling state to job monitor entry.
                        currentJobState = Session.Data.JobState.Canceling;
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitor] Overwrite current job state as {0} in Telepathy according to previous job state {1}\n", currentJobState, previousJobState);
                    }
                }
                catch (BatchException e)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[AzureBatchJobMonitor] BatchException thrown when querying job info: {0}", e);
                    //If the previous job state is canceling and current job is not found, then the job is deleted.
                    if (e.RequestInformation != null & e.RequestInformation.HttpStatusCode != null)
                    {
                        if (e.RequestInformation.HttpStatusCode == System.Net.HttpStatusCode.NotFound)
                        {
                            if (previousJobState == Session.Data.JobState.Canceling)
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[AzureBatchJobMonitor] The queried job has been deleted.");
                            }
                            else
                            {
                                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[AzureBatchJobMonitor] The queried job previous state is {0}, we make its state as canceled because it's no longer exist.", previousJobState);
                            }
                            shouldExit      = true;
                            currentJobState = Session.Data.JobState.Canceled;
                        }
                    }
                }
                catch (Exception e)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[AzureBatchJobMonitor] Exception thrown when querying job info: {0}", e);
                }

                try
                {
                    if (this.ReportJobStateAction != null)
                    {
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information,
                                               "[AzureBatchJobMonitor] Current job state report to AzureBatchJobMonitorEntry: JobState = {0}\n",
                                               currentJobState);
                        this.ReportJobStateAction(currentJobState, stateChangedTaskList, shouldExit);
                    }
                }
                catch (Exception e)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[AzureBatchJobMonitor] Exception thrown when report job info: {0}", e);
                }

                this.previousJobState = currentJobState;

                if (!shouldExit)
                {
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[AzureBatchJobMonitor] Waiting {0} milliseconds and start another round of getting job state info.", this.pullJobGap);

                    // Sleep and pull job again, clear the register pull job flag
                    await Task.Delay(this.pullJobGap);

                    if (this.pullJobGap < PullJobMaxGap)
                    {
                        this.pullJobGap *= 2;
                        if (this.pullJobGap > PullJobMaxGap)
                        {
                            this.pullJobGap = PullJobMaxGap;
                        }
                    }
                }
            }
        }