public async Task Update(InferenceJob request, InferenceJobStatus status) { if (status == InferenceJobStatus.Success) { _logger.Log(LogLevel.Information, $"Removing job {request.JobId} from job store as completed."); await Delete(request); } else { if (++request.TryCount > MaxRetryLimit) { _logger.Log(LogLevel.Information, $"Exceeded maximum job submission retries; removing job {request.JobId} from job store."); await Delete(request); } else { _logger.Log(LogLevel.Debug, $"Adding job {request.JobId} back to job store for retry."); request.State = InferenceJobState.Queued; _logger.Log(LogLevel.Debug, $"Updating request {request.JobId} to Queued."); await UpdateInferenceJob(request); _logger.Log(LogLevel.Information, $"Job {request.JobId} added back to job store for retry."); } } }
public async Task <InferenceJob> TransitionState(InferenceJob job, InferenceJobStatus status, CancellationToken cancellationToken = default) { Guard.Against.Null(job, nameof(job)); if (status == InferenceJobStatus.Success) { var originalState = job.State; job.State = job.State switch { InferenceJobState.Creating => InferenceJobState.Created, InferenceJobState.MetadataUploading => InferenceJobState.MetadataUploaded, InferenceJobState.PayloadUploading => InferenceJobState.PayloadUploaded, InferenceJobState.Starting => InferenceJobState.Completed, _ => throw new ApplicationException($"unsupported job state {job.State}") }; job.TryCount = 0; job.LastUpdate = DateTime.MinValue; _logger.Log(LogLevel.Information, $"Updating inference job state {job.JobId} from {originalState } to {job.State}."); await UpdateInferenceJob(job, cancellationToken); } else { if (++job.TryCount > _configuration.Value.Services.Platform.MaxRetries) { _logger.Log(LogLevel.Warning, $"Job {job.JobId} exceeded maximum number of retries."); job.State = InferenceJobState.Faulted; } else { job.State = job.State switch { InferenceJobState.Creating => InferenceJobState.Queued, InferenceJobState.MetadataUploading => InferenceJobState.Created, InferenceJobState.PayloadUploading => InferenceJobState.MetadataUploaded, InferenceJobState.Starting => InferenceJobState.PayloadUploaded, _ => throw new ApplicationException($"unsupported job state {job.State}") }; _logger.Log(LogLevel.Information, $"Putting inference job {job.JobId} back to {job.State} state for retry."); } job.LastUpdate = DateTime.UtcNow; await UpdateInferenceJob(job, cancellationToken); } return(job); }
private async Task ProcessNextJob(IJobRepository repository, IJobs jobsApi, CancellationToken cancellationToken) { InferenceJob job = null; InferenceJobStatus status = InferenceJobStatus.Fail; try { _logger.Log(LogLevel.Debug, $"Waiting for new job..."); job = await repository.Take(cancellationToken); using (_logger.BeginScope(new LogginDataDictionary <string, object> { { "JobId", job.JobId }, { "PayloadId", job.PayloadId } })) { switch (job.State) { case InferenceJobState.Creating: await CreateJob(job); break; case InferenceJobState.MetadataUploading: await UploadMetadata(job); break; case InferenceJobState.PayloadUploading: await UploadFiles(job, job.JobPayloadsStoragePath); break; case InferenceJobState.Starting: await jobsApi.Start(job); break; default: throw new InvalidOperationException($"Unsupported job state {job.State}."); } status = InferenceJobStatus.Success; } } catch (OperationCanceledException ex) { _logger.Log(LogLevel.Warning, ex, "Job Store Service canceled: {0}"); } catch (InvalidOperationException ex) { _logger.Log(LogLevel.Warning, ex, "Job Store Service may be disposed or Jobs API returned an error: {0}"); } catch (PayloadUploadException ex) { _logger.Log(LogLevel.Error, ex, ex.Message); } catch (Exception ex) { _logger.Log(LogLevel.Error, ex, "Error communicating with Clara Platform."); } finally { if (job != null) { try { var updatedJob = await repository.TransitionState(job, status, cancellationToken); if (updatedJob.State == InferenceJobState.Completed || updatedJob.State == InferenceJobState.Faulted) { CleanupJobFiles(updatedJob); } } catch (Exception ex) { _logger.Log(LogLevel.Error, ex, "Error while transitioning job state."); } } } }