Example #1
0
        public override Task ApplyAsync(Guid activityId, CoordinatorContext coordinatorContext)
        {
            coordinatorContext.Validate("coordinatorContext");

            foreach (var job in coordinatorContext.MappedTenantJobs.Values)
            {
                // Avoid logging or doing any processing for jobs that have no actions pending (e.g. completed jobs)
                if (job.PendingActions == ActionType.None)
                {
                    continue;
                }

                var jobId = job.Id;
                var ud    = job.TenantJob.GetJobUD();

                IAllowActionRecord allowAction = allowActionMap.Get(jobId, ud);
                if (allowAction != null)
                {
                    // Allow the user-specified actions
                    job.AllowActions(this.traceType, allowAction.Action);
                }
            }

            return(Task.FromResult(0));
        }
        public override Task ApplyAsync(Guid activityId, CoordinatorContext coordinatorContext)
        {
            coordinatorContext.Validate("coordinatorContext");

            foreach (var job in coordinatorContext.MappedTenantJobs.Values)
            {
                ApplyActionPolicy(job, ActionType.Execute);
                ApplyActionPolicy(job, ActionType.Restore);
            }

            return(Task.FromResult(0));
        }
Example #3
0
        public static async Task ApplyPoliciesAsync(
            Guid activityId,
            IList <IActionPolicy> actionPolicies,
            CoordinatorContext coordinatorContext)
        {
            actionPolicies.Validate("actionPolicies");
            coordinatorContext.Validate("coordinatorContext");

            foreach (var policy in actionPolicies)
            {
                // if a policy throws an exception, we don't continue
                await policy.ApplyAsync(activityId, coordinatorContext).ConfigureAwait(false);
            }
        }
Example #4
0
        public static void CreateMappedWorkItems(
            IList <ITenantJob> tenantJobs,
            IList <IRepairTask> repairTasks,
            CoordinatorContext coordinatorContext)
        {
            tenantJobs.Validate("tenantJobs");
            repairTasks.Validate("repairTasks");
            coordinatorContext.Validate("coordinatorContext");

            foreach (var job in tenantJobs)
            {
                coordinatorContext.MappedTenantJobs.Add(job.Id, new MappedTenantJob(job));
            }

            foreach (var repairTask in repairTasks)
            {
                coordinatorContext.MappedRepairTasks.Add(repairTask.TaskId, new MappedRepairTask(repairTask));
            }
        }
Example #5
0
        public static IList <IAction> GetActions(
            Guid activityId,
            CoordinatorContext coordinatorContext)
        {
            coordinatorContext.Validate("coordinatorContext");

            IList <IAction> actions = new List <IAction>();

            foreach (var mappedTenantJob in coordinatorContext.MappedTenantJobs.Values)
            {
                AddActions(mappedTenantJob, actions);
            }

            foreach (var mappedRepairTask in coordinatorContext.MappedRepairTasks.Values)
            {
                AddActions(mappedRepairTask, actions);
            }

            return(actions);
        }
Example #6
0
        /// <summary>
        /// Undocumented helper method since the PowerShell Get-ServiceFabricRepairTask returns timestamps without second/millisecond info.
        /// Not intended for automation use. Use Get-ServiceFabricRepairTask instead.
        /// </summary>
        private Task <string> HandleCommandGetRepairTasksAsync(string args)
        {
            List <IRepairTask> repairTaskList;

            CoordinatorContext cc = CoordinatorContext;

            if (cc == null)
            {
                throw Trace.CreateException(
                          traceType,
                          NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NOT_READY,
                          "Job information is not available");
            }

            repairTaskList = cc.MappedRepairTasks.Values.Select(e => e.RepairTask).ToList();

            var json = repairTaskList.ToJson();

            return(Task.FromResult(json));
        }
Example #7
0
        public override async Task ApplyAsync(Guid activityId, CoordinatorContext coordinatorContext)
        {
            coordinatorContext.Validate("coordinatorContext");

            if (coordinatorContext.MappedTenantJobs.Count == 0)
            {
                return;
            }

            var policy = await jobBlockingPolicyManager.GetPolicyAsync();

            traceType.WriteInfo("Current job blocking policy is {0}", policy);

            foreach (var mappedTenantJob in coordinatorContext.MappedTenantJobs.Values)
            {
                if (IsBlocked(policy, mappedTenantJob))
                {
                    mappedTenantJob.DenyActions(this.traceType, ActionType.Prepare);
                }
            }
        }
Example #8
0
        private async Task <string> HandleCommandGetCurrentStateAsync(string args)
        {
            traceType.WriteInfo("GetCurrentState requested");

            CoordinatorContext cc = CoordinatorContext;

            var jobBlockingPolicy = await jobBlockingPolicyManager.GetPolicyAsync().ConfigureAwait(false);

            //var actionsToAllow = allowActionMap.Get().Cast<AllowActionRecord>().ToList();

            var coordinatorStateData = new CoordinatorStateData
            {
                JobBlockingPolicy = jobBlockingPolicy.ToString(),
                //JobSetsRequiringApproval = cc.Doc.JobSetsRequiringApproval,
                CoordinatorStartTime = CoordinatorDiagnosticInfo.CoordinatorStartTime,
                AzureTenantId        = CoordinatorDiagnosticInfo.AzureTenantId,
                AssemblyFileVersion  = CoordinatorDiagnosticInfo.AssemblyFileVersion,
                //OperatorAllowedActions = actionsToAllow,
            };

            if (cc != null)
            {
                var jobs = new List <JobSummary>();

                foreach (var j in cc.MappedTenantJobs.Values)
                {
                    jobs.Add(j.GetSummary());
                }

                coordinatorStateData.Jobs = jobs;
                coordinatorStateData.JobDocumentIncarnation = cc.Doc.JobDocumentIncarnation.ToString();
                coordinatorStateData.LastRunStartTime       = cc.RunStartTime.ToString("O");
                coordinatorStateData.LastRunFinishTime      = cc.RunFinishTime.ToString("O");
            }

            return(coordinatorStateData.ToJson());
        }
Example #9
0
        private Task <string> HandleCommandGetJobsAsync(string args)
        {
            traceType.WriteInfo("GetJobs requested");

            CoordinatorContext cc = CoordinatorContext;

            if (cc == null)
            {
                throw Trace.CreateException(
                          traceType,
                          NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NOT_READY,
                          "Job information is not available");
            }

            // for a full view, GetDoc could be used
            var jobs = new List <JobSummary>();

            foreach (var j in cc.MappedTenantJobs.Values)
            {
                jobs.Add(j.GetSummary());
            }

            return(Task.FromResult(jobs.ToJson()));
        }
Example #10
0
 public abstract Task ApplyAsync(Guid activityId, CoordinatorContext coordinatorContext);
        private async Task ProcessJobInfoAsync(Guid activityId, IPolicyAgentDocumentForTenant doc, CancellationToken cancellationToken)
        {
            DateTime now = DateTime.UtcNow;
            TimeSpan completedTaskAgeThreshold = this.GetCompletedTaskAgeThreshold();

            // get the ones that are claimed by this service (which is the Repair Executor (RE))
            var repairTasks = await repairManager
                              .GetRepairTaskListAsync(activityId, null, RepairTaskStateFilter.Default, this.environment.ServiceName)
                              .ConfigureAwait(false);

            // Count completed tasks that are going to be ignored because they are too old
            int oldCompletedTaskCount = repairTasks.Count(t => IsOldCompletedRepairTask(t, now, completedTaskAgeThreshold));

            // Keep all of the rest (active or recent)
            repairTasks = repairTasks.Where(t => !IsOldCompletedRepairTask(t, now, completedTaskAgeThreshold)).ToList();

            // get all the unclaimed repair tasks too... some may not match, we'll evaluate and ignore them later
            var unclaimedRepairTasks = await repairManager
                                       .GetRepairTaskListAsync(activityId, null, RepairTaskStateFilter.Created, null)
                                       .ConfigureAwait(false);

            foreach (var unclaimedRepairTask in unclaimedRepairTasks)
            {
                repairTasks.Add(unclaimedRepairTask);
            }

            traceType.WriteInfo(
                "Processing repair tasks (count: {0}; ignored completed tasks older than {1}: {2})",
                repairTasks.Count,
                completedTaskAgeThreshold,
                oldCompletedTaskCount);

            // Dump each task separately to avoid exceeding the ETW event size limit
            for (int i = 0; i < repairTasks.Count; ++i)
            {
                traceType.WriteInfo("Repair task {0} of {1}:{2}{3}", i + 1, repairTasks.Count, Environment.NewLine, repairTasks[i].ToJson());
            }

            var coordinatorContext = new CoordinatorContext {
                Doc = doc
            };

            ActionHelper.CreateMappedWorkItems(doc.Jobs, repairTasks, coordinatorContext);

            var reconciler = new Reconciler(
                this.environment,
                this.policyAgentClient,
                this.repairManager,
                this.repairActionProvider);

            await reconciler.ReconcileAsync(activityId, coordinatorContext).ConfigureAwait(false);

            await ActionHelper.ApplyPoliciesAsync(activityId, actionPolicies, coordinatorContext).ConfigureAwait(false);

            // Publish state after reconcile + policy phases have completed
            coordinatorContext.MarkFinished();
            coordinatorCommandProcessor.CoordinatorContext = coordinatorContext;

            var blockingPolicy = await this.jobBlockingPolicyManager.GetPolicyAsync();

            if (blockingPolicy == JobBlockingPolicy.BlockAllJobs)
            {
                traceType.WriteInfo("All actions are blocked by job blocking policy");
            }
            else
            {
                var actionsToExecute = ActionHelper.GetActions(activityId, coordinatorContext);
                await ActionHelper.ExecuteActionsAsync(activityId, this.actionTraceType, actionsToExecute).ConfigureAwait(false);

                // Send all job responses (if any) as a single batch response
                await policyAgentClient.SendJobResponseAsync(
                    activityId,
                    doc.JobDocumentIncarnation,
                    reconciler.GetJobStepResponses(),
                    "Automated response by Service Fabric Infrastructure Service",
                    cancellationToken).ConfigureAwait(false);
            }
        }
Example #12
0
        // TODO, ensure stability
        // i.e. on re-execution, the same things should re-execute (if the state hasn't changed)
        public override Task ApplyAsync(Guid activityId, CoordinatorContext coordinatorContext)
        {
            coordinatorContext.Validate("coordinatorContext");

            if (coordinatorContext.MappedTenantJobs.Count == 0)
            {
                return(Task.FromResult(0));
            }

            var allJobs    = coordinatorContext.MappedTenantJobs.Values;
            var activeJobs = allJobs.Where(j => j.IsActive).ToList();

            // Count all active jobs
            int totalActiveJobCount = activeJobs.Count;

            // Count active jobs by type
            int            activeUpdateJobCount = 0;
            JobTypeCounter jobTypeCounter       = new JobTypeCounter(this.configSection);

            foreach (var job in activeJobs)
            {
                traceType.WriteInfo("Active job {0} ({1})", job.Id, job.TenantJob.GetImpactAction());

                jobTypeCounter.AddActiveJob(job.TenantJob);

                if (job.TenantJob.IsUpdateJobType())
                {
                    ++activeUpdateJobCount;
                }
            }

            int maxParallelJobCount = configSection.ReadConfigValue(
                Constants.ConfigKeys.MaxParallelJobCountTotal,
                DefaultMaxParallelJobCountTotal);

            int maxParallelUpdateJobCount = configSection.ReadConfigValue(
                Constants.ConfigKeys.MaxParallelJobCountUpdate,
                DefaultMaxParallelJobCountUpdate);

            traceType.WriteInfo(
                "Active/max job counts: Total: {0}/{1}, Updates: {2}/{3}, {4}",
                totalActiveJobCount,
                maxParallelJobCount,
                activeUpdateJobCount,
                maxParallelUpdateJobCount,
                jobTypeCounter);

            // Find all jobs that are waiting to prepare
            var pendingJobs = allJobs
                              .Where(j => !j.IsActive && ((j.AllowedActions & ActionType.Prepare) == ActionType.Prepare))
                              .OrderBy(j => j.TenantJob.GetImpactAction()) // Apply default static priority based on job type
                              .ToList();

            // TODO, ensure that we don't ack too many in the 2nd pass just after acking once
            // choose the simplest logic for now. In future, we will pick based on oldest document incarnation number etc.

            foreach (var pendingJob in pendingJobs)
            {
                // Fall through the checks, so that all blocking reasons are logged
                bool allowJob = true;

                if (totalActiveJobCount >= maxParallelJobCount)
                {
                    traceType.WriteInfo(
                        "Not starting job {0} because it would exceed max total parallel job count ({1}/{2})",
                        pendingJob.Id,
                        totalActiveJobCount,
                        maxParallelJobCount);

                    allowJob = false;
                }

                JobCount count;
                if (!jobTypeCounter.CanAddActiveJob(pendingJob.TenantJob, out count))
                {
                    traceType.WriteInfo(
                        "Not starting job {0} because it would exceed max parallel job count for type {1} ({2})",
                        pendingJob.Id,
                        pendingJob.TenantJob.GetImpactAction(),
                        count);

                    allowJob = false;
                }

                if (pendingJob.TenantJob.IsUpdateJobType() && (activeUpdateJobCount >= maxParallelUpdateJobCount))
                {
                    traceType.WriteInfo(
                        "Not starting job {0} because it would exceed max parallel update job count ({1}/{2})",
                        pendingJob.Id,
                        activeUpdateJobCount,
                        maxParallelUpdateJobCount);

                    allowJob = false;
                }

                if (allowJob)
                {
                    ++totalActiveJobCount;
                    jobTypeCounter.AddActiveJob(pendingJob.TenantJob);

                    if (pendingJob.TenantJob.IsUpdateJobType())
                    {
                        ++activeUpdateJobCount;
                    }

                    traceType.WriteInfo("Allowing job {0} to start", pendingJob.Id);
                }
                else
                {
                    pendingJob.DenyActions(traceType, ActionType.Prepare);
                }
            }

            return(Task.FromResult(0));
        }