private async Task WaitUntilJobCompletedAsync(string jobId, int maxWaitInMilliseconds = 5000) { IMonitoringApi monitoringApi = JobStorage.Current.GetMonitoringApi(); var sw = Stopwatch.StartNew(); JobDetailsDto jobDetails = null; while ((jobDetails == null || jobDetails.History.All(s => s.StateName != "Succeeded")) && (sw.Elapsed.TotalMilliseconds < maxWaitInMilliseconds || Debugger.IsAttached)) { await Task.Delay(25); jobDetails = monitoringApi.JobDetails(jobId); if (monitoringApi.FailedCount() > 0) { break; } } FailedJobDto failedJob = monitoringApi .FailedJobs(0, int.MaxValue) .Select(j => j.Value) .FirstOrDefault(); if (failedJob != null) { throw new InvalidOperationException($"Job failed: {failedJob.ExceptionDetails}."); } _client.Delete(jobId); }
public void Refresh() { _scheduled = _hangfire.ScheduledJobs(0, (int)_hangfire.ScheduledCount()); _processing = _hangfire.ProcessingJobs(0, (int)_hangfire.ProcessingCount()); _failed = _hangfire.FailedJobs(0, (int)_hangfire.FailedCount()); _succeded = _hangfire.SucceededJobs(0, (int)_hangfire.SucceededListCount()); _recurring = JobStorage.Current.GetConnection().GetRecurringJobs(); }
private TResult ThrowError <TResult>(string jobId) { var total = (int)monitoringApi.FailedCount(); var numberOfJobs = 10; for (var i = 0; i < total; i += numberOfJobs) { var start = Math.Max(total - i - numberOfJobs, 0); var end = total - i; var count = end - start; var job = monitoringApi.FailedJobs(start, count).SingleOrDefault(x => x.Key == jobId).Value; if (job != null) { throw new JobFailedException($"The job threw a exception of type '{job.ExceptionType}'\nMessage: {job.ExceptionMessage}\nDetails: {job.ExceptionDetails}"); } } throw new InvalidOperationException("Failed to find job"); }
/// <summary> /// Checks various HangFire settings, i.e., failing jobs and a positive number of servers /// </summary> /// <param name="context"></param> /// <param name="cancellationToken"></param> /// <returns></returns> public Task <HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default) { _logger.LogInformation("Health check HangFire"); var status = HealthStatus.Healthy; var data = new Dictionary <string, object>(); // Check servers var servers = _hangFireMonitoringApi.Servers(); if (servers.Count < 1) { var message = $"Number of servers is less than expected at time {DateTime.Now}"; _logger.LogWarning(message); status = HealthStatus.Unhealthy; data.Add(message, servers.Count); } // Check status of recurring jobs var recurringJobs = _healthCheckHangFireService.GetRecurringJobs(); var noOfActuallyEnabledJobs = recurringJobs.Count(j => j.NextExecution != null); var noOfExpectedEnabledJobs = _appSettingsConfig.HealthCheckSettings.NoOfEnabledJobs; if (noOfActuallyEnabledJobs < noOfExpectedEnabledJobs) { var message = $"Number of enabled jobs is less than expected at time {DateTime.Now}. Expected:{noOfExpectedEnabledJobs} - Actual:{noOfActuallyEnabledJobs}"; _logger.LogWarning(message); status = HealthStatus.Unhealthy; data.Add(message, noOfActuallyEnabledJobs); var enabledJobIds = _appSettingsConfig.HealthCheckSettings.EnabledJobIds; foreach (var jobId in enabledJobIds) { var job = recurringJobs.First(j => j.Id == jobId); if (job.NextExecution != null) { continue; } var jobMessage = $"Job '{jobId}' is not enabled as expected"; _logger.LogWarning(jobMessage); status = HealthStatus.Unhealthy; var info = job.LastExecution != null ? $"Last execution time: {job.LastExecution}" : $"No last execution time for {jobId}"; data.Add(jobMessage, info); } } // Check failing jobs var failingJobsCount = _hangFireMonitoringApi.FailedCount(); if (failingJobsCount > 0) { status = HealthStatus.Unhealthy; data.Add("failed jobs count", failingJobsCount); _logger.LogWarning("HangFire has one or more failed jobs"); var jobs = _hangFireMonitoringApi.FailedJobs(0, 10); foreach (var job in jobs) { var key = job.Key; var val = job.Value; _logger.LogWarning($"Failed job ID '{key}' - {val.ExceptionDetails} - {val.ExceptionMessage}"); try { var serializedVal = JsonSerializer.Serialize(val, val.GetType()); data.Add($"Job ID {key}", serializedVal); } catch (Exception e) { var errorMessage = $"Error in health check HangFire. {e.Message} - {e.StackTrace}"; _logger.LogError(errorMessage); // Adding extra details from the failed job to data var failedJobDetails = $"JOB FAILED: EXCEPTION DETAILS: {val.ExceptionDetails} - EXCEPTION MESSAGE: {val.ExceptionMessage} - EXCEPTION TYPE: {val.ExceptionType} - FAILED AT: {val.FailedAt}"; data.Add($"Job ID {key}. HangFire health check could not serialize class {val.GetType()}", failedJobDetails); return(Task.FromResult(new HealthCheckResult( status, Description, e, data))); } } } if (data.Count == 0) { data.Add("State", "HangFire jobs and servers are in expected state"); } return(Task.FromResult(new HealthCheckResult( status, Description, data: data))); }