public DefaultScheduler( TStorageKey schedulerId, ISchedulerMetadata schedulerMetadata, ISchedulerMetadataStore <TStorageKey> schedulerMetadataStore, IClusterStateMonitor <TStorageKey> clusterStateMonitor, IJobProcessor <TStorageKey> jobProcessor, ILogger <DefaultScheduler <TStorageKey> > logger) { this.schedulerId = schedulerId; this.metadata = schedulerMetadata; this.schedulerMetadataStore = schedulerMetadataStore; this.clusterStateMonitor = clusterStateMonitor; this.jobProcessor = jobProcessor; this.logger = logger; }
private async Task ProcessStalledScheduler(TSchedulerKey schedulerId, ISchedulerMetadata schedulerMetadata) { this.logger.LogWarning("Scheduler {0} seems to be stalled. Last heartbeat at {1}, Timeout {2}", schedulerId, schedulerMetadata.LastCheckin, schedulerMetadata.HeartbeatTimeout); try { await this.schedulerMetadataStore.RemoveScheduler(schedulerId).ConfigureAwait(false); this.logger.LogInformation("Scheduler {0} removed", schedulerId); } catch (ConcurrencyException ex) { this.logger.LogInformation(ex, "Scheduler {0} removed by someone else: {1}", schedulerId, ex.Message); } var executingJobs = await this.jobStore.GetExecutingJobs(schedulerId).ConfigureAwait(false); if (executingJobs.Count == 0) { this.logger.LogInformation("There's no jobs executing at stalled scheduler {0}", schedulerId); return; } this.logger.LogWarning("{0} jobs found to be executing at stalled scheduler {1}", executingJobs.Count, schedulerId); foreach (var stalledJob in executingJobs) { try { await this.RecoverJob(stalledJob.id).ConfigureAwait(false); } catch (Exception ex) { this.logger.LogError(ex, "Processing job {0} at stalled scheduler {1} failed: {2}", stalledJob.id, schedulerId, ex.Message); } } }