Beispiel #1
0
        private async Task ApproveAllJobsAsync(Guid activityId, CancellationToken cancellationToken)
        {
            // Create the PA client; this will fail when MR is off, which it is expected to be
            IPolicyAgentClient policyAgentClient = await this.policyAgentClientAsyncFactory();

            IPolicyAgentDocumentForTenant doc = await policyAgentClient.GetDocumentAsync(activityId, cancellationToken);

            if (doc == null)
            {
                // Parallel job mode is not enabled
                throw new InvalidOperationException("Failed to get job info. Verify that job parallelism is enabled for this tenant.");
            }

            string docString = CoordinatorHelper.FormatDocumentForTracing(doc);

            traceType.WriteInfo("Received document: {0}", docString);

            Dictionary <Guid, JobStepResponseEnum> jobResponses = new Dictionary <Guid, JobStepResponseEnum>();

            foreach (var job in doc.Jobs)
            {
                string message;

                if (IsJobWaitingForApproval(job))
                {
                    message = $"Auto-approving job {job.Id}";
                    jobResponses.Add(job.Id, JobStepResponseEnum.Acknowledged);
                }
                else
                {
                    message = $"Ignoring job {job.Id} in state that does not require tenant approval";
                }

                traceType.WriteInfo(
                    "{0}: {1} {2} ({3}/{4})",
                    message,
                    job.GetImpactAction(),
                    job.JobStatus,
                    job.JobStep == null ? "-" : job.JobStep.ImpactStep.ToString(),
                    job.JobStep == null ? "-" : job.JobStep.AcknowledgementStatus.ToString());
            }

            if (jobResponses.Count > 0)
            {
                // Send all job responses (if any) as a single batch response
                await policyAgentClient.SendJobResponseAsync(
                    activityId,
                    doc.JobDocumentIncarnation,
                    jobResponses,
                    "Auto-approved by Service Fabric because coordination is disabled",
                    cancellationToken);
            }
            else
            {
                traceType.WriteInfo("No jobs to approve");
            }
        }
        private async Task ProcessPolicyAgentDocumentAsync(CancellationToken cancellationToken)
        {
            traceType.WriteInfo("Starting to process policy agent documents.");

            var lastSuccessfulRunAt = DateTimeOffset.UtcNow;

            while (!cancellationToken.IsCancellationRequested)
            {
                traceType.WriteInfo(
                    "Azure coordinator: service name = {0}, partition ID = {1}, tenant ID = {2}, code version = {3}, start time = {4:O}",
                    this.environment.ServiceName,
                    this.partitionId,
                    this.tenantId,
                    this.assemblyFileVersion,
                    this.coordinatorRunAsyncStartTime);

                TimeSpan timerPeriod = this.GetPollingInterval(Constants.ConfigKeys.JobPollingIntervalInSeconds,
                                                               DefaultJobPollingIntervalInSeconds,
                                                               MaxJobPollingInterval);

                try
                {
                    var activityId = Guid.NewGuid();

                    IPolicyAgentDocumentForTenant doc = await policyAgentClient.GetDocumentAsync(activityId, cancellationToken).ConfigureAwait(false);

                    if (doc == null)
                    {
                        // Parallel job mode is not enabled
                        throw new InvalidOperationException("Failed to get job info. Verify that job parallelism is enabled for this tenant.");
                    }
                    else
                    {
                        // If the incarnation stops changing for a long time, the MR channel may be unhealthy,
                        // due to the physical node being in HI/OFR state. Exit the process to cause a failover,
                        // hopefully to a healthier node.
                        this.CheckChannelHealth(activityId, doc);

                        if (!this.isNotificationAvailable)
                        {
                            traceType.WriteInfo("MR channel notification is not available; waiting for channel health check to pass.");
                        }
                        else
                        {
                            string docString = CoordinatorHelper.FormatDocumentForTracing(doc);
                            traceType.WriteInfo("Received document: {0}", docString);

                            Task healthProcessingTask = ProcessHealthAsync(activityId, doc, timerPeriod, cancellationToken);
                            Task jobProcessingTask    = ProcessJobInfoAsync(activityId, doc, cancellationToken);

                            await healthProcessingTask.ConfigureAwait(false);

                            await jobProcessingTask.ConfigureAwait(false);
                        }
                    }

                    lastSuccessfulRunAt = DateTimeOffset.UtcNow;
                    UpdateCoordinatorHealthStatus(HealthState.Ok, "Azure coordinator is operating normally.");
                }
                catch (Exception ex)
                {
                    if (!HandleCoordinatorException(ex, lastSuccessfulRunAt))
                    {
                        throw;
                    }
                }

                traceType.WriteInfo("Next run will occur in {0} at {1:O}", timerPeriod, DateTimeOffset.UtcNow + timerPeriod);
                await Task.Delay(timerPeriod, cancellationToken);
            }

            traceType.WriteInfo("Cancellation requested. Returning from processing policy agent documents.");
        }