private async Task BackgroundProcessing(CancellationToken cancellationToken) { _logger.Log(LogLevel.Information, "Job Submitter Hosted Service is running."); while (!cancellationToken.IsCancellationRequested) { InferenceJob job = null; try { job = await _jobStore.Take(cancellationToken); using (_logger.BeginScope(new Dictionary <string, object> { { "JobId", job.JobId }, { "PayloadId", job.PayloadId } })) { var files = _fileSystem.Directory.GetFiles(job.JobPayloadsStoragePath, "*", System.IO.SearchOption.AllDirectories); await UploadFiles(job, job.JobPayloadsStoragePath, files); await _jobsApi.Start(job); await _jobStore.Update(job, InferenceJobStatus.Success); RemoveFiles(files); } } catch (OperationCanceledException ex) { _logger.Log(LogLevel.Warning, ex, "Job Store Service canceled: {0}"); } catch (InvalidOperationException ex) { _logger.Log(LogLevel.Warning, ex, "Job Store Service may be disposed or Jobs API returned an error: {0}"); } catch (Exception ex) { _logger.Log(LogLevel.Error, ex, "Error uploading payloads/starting job."); if (job != null) { await _jobStore.Update(job, InferenceJobStatus.Fail); } } } _logger.Log(LogLevel.Information, "Cancellation requested."); }
protected async Task SubmitPipelineJob(string jobName, string pipelineId, JobPriority jobPriority, string basePath, IList <InstanceStorageInfo> instances) { Guard.Against.NullOrWhiteSpace(pipelineId, nameof(pipelineId)); if (instances.IsNullOrEmpty()) { throw new ArgumentNullException(nameof(instances)); } jobName = jobName.FixJobName(); Guard.Against.NullOrWhiteSpace(jobName, nameof(jobName)); _logger.Log(LogLevel.Information, "Submitting a new job '{0}' with pipeline '{1}', priority={2}, instance count={3}", jobName, pipelineId, jobPriority, instances.Count); var job = await _jobsApi.Create(pipelineId, jobName, jobPriority); using (_logger.BeginScope(new Dictionary <string, object> { { "JobId", job.JobId }, { "PayloadId", job.PayloadId } })) { await UploadFiles(job, basePath, instances); await _jobsApi.Start(job); } }
private async Task ProcessNextJob(IJobRepository repository, IJobs jobsApi, CancellationToken cancellationToken) { InferenceJob job = null; InferenceJobStatus status = InferenceJobStatus.Fail; try { _logger.Log(LogLevel.Debug, $"Waiting for new job..."); job = await repository.Take(cancellationToken); using (_logger.BeginScope(new LogginDataDictionary <string, object> { { "JobId", job.JobId }, { "PayloadId", job.PayloadId } })) { switch (job.State) { case InferenceJobState.Creating: await CreateJob(job); break; case InferenceJobState.MetadataUploading: await UploadMetadata(job); break; case InferenceJobState.PayloadUploading: await UploadFiles(job, job.JobPayloadsStoragePath); break; case InferenceJobState.Starting: await jobsApi.Start(job); break; default: throw new InvalidOperationException($"Unsupported job state {job.State}."); } status = InferenceJobStatus.Success; } } catch (OperationCanceledException ex) { _logger.Log(LogLevel.Warning, ex, "Job Store Service canceled: {0}"); } catch (InvalidOperationException ex) { _logger.Log(LogLevel.Warning, ex, "Job Store Service may be disposed or Jobs API returned an error: {0}"); } catch (PayloadUploadException ex) { _logger.Log(LogLevel.Error, ex, ex.Message); } catch (Exception ex) { _logger.Log(LogLevel.Error, ex, "Error communicating with Clara Platform."); } finally { if (job != null) { try { var updatedJob = await repository.TransitionState(job, status, cancellationToken); if (updatedJob.State == InferenceJobState.Completed || updatedJob.State == InferenceJobState.Faulted) { CleanupJobFiles(updatedJob); } } catch (Exception ex) { _logger.Log(LogLevel.Error, ex, "Error while transitioning job state."); } } } }