コード例 #1
0
        public async Task Update(InferenceJob request, InferenceJobStatus status)
        {
            if (status == InferenceJobStatus.Success)
            {
                _logger.Log(LogLevel.Information, $"Removing job {request.JobId} from job store as completed.");
                await Delete(request);
            }
            else
            {
                if (++request.TryCount > MaxRetryLimit)
                {
                    _logger.Log(LogLevel.Information, $"Exceeded maximum job submission retries; removing job {request.JobId} from job store.");
                    await Delete(request);
                }
                else
                {
                    _logger.Log(LogLevel.Debug, $"Adding job {request.JobId} back to job store for retry.");
                    request.State = InferenceJobState.Queued;
                    _logger.Log(LogLevel.Debug, $"Updating request {request.JobId} to Queued.");
                    await UpdateInferenceJob(request);

                    _logger.Log(LogLevel.Information, $"Job {request.JobId} added back to job store for retry.");
                }
            }
        }
コード例 #2
0
        public async Task <InferenceJob> TransitionState(InferenceJob job, InferenceJobStatus status, CancellationToken cancellationToken = default)
        {
            Guard.Against.Null(job, nameof(job));

            if (status == InferenceJobStatus.Success)
            {
                var originalState = job.State;
                job.State = job.State switch
                {
                    InferenceJobState.Creating => InferenceJobState.Created,
                    InferenceJobState.MetadataUploading => InferenceJobState.MetadataUploaded,
                    InferenceJobState.PayloadUploading => InferenceJobState.PayloadUploaded,
                    InferenceJobState.Starting => InferenceJobState.Completed,
                    _ => throw new ApplicationException($"unsupported job state {job.State}")
                };
                job.TryCount   = 0;
                job.LastUpdate = DateTime.MinValue;

                _logger.Log(LogLevel.Information, $"Updating inference job state {job.JobId} from {originalState } to {job.State}.");
                await UpdateInferenceJob(job, cancellationToken);
            }
            else
            {
                if (++job.TryCount > _configuration.Value.Services.Platform.MaxRetries)
                {
                    _logger.Log(LogLevel.Warning, $"Job {job.JobId} exceeded maximum number of retries.");
                    job.State = InferenceJobState.Faulted;
                }
                else
                {
                    job.State = job.State switch
                    {
                        InferenceJobState.Creating => InferenceJobState.Queued,
                        InferenceJobState.MetadataUploading => InferenceJobState.Created,
                        InferenceJobState.PayloadUploading => InferenceJobState.MetadataUploaded,
                        InferenceJobState.Starting => InferenceJobState.PayloadUploaded,
                        _ => throw new ApplicationException($"unsupported job state {job.State}")
                    };
                    _logger.Log(LogLevel.Information, $"Putting inference job {job.JobId} back to {job.State} state for retry.");
                }
                job.LastUpdate = DateTime.UtcNow;
                await UpdateInferenceJob(job, cancellationToken);
            }

            return(job);
        }
コード例 #3
0
        private async Task ProcessNextJob(IJobRepository repository, IJobs jobsApi, CancellationToken cancellationToken)
        {
            InferenceJob       job    = null;
            InferenceJobStatus status = InferenceJobStatus.Fail;

            try
            {
                _logger.Log(LogLevel.Debug, $"Waiting for new job...");
                job = await repository.Take(cancellationToken);

                using (_logger.BeginScope(new LogginDataDictionary <string, object> {
                    { "JobId", job.JobId }, { "PayloadId", job.PayloadId }
                }))
                {
                    switch (job.State)
                    {
                    case InferenceJobState.Creating:
                        await CreateJob(job);

                        break;

                    case InferenceJobState.MetadataUploading:
                        await UploadMetadata(job);

                        break;

                    case InferenceJobState.PayloadUploading:
                        await UploadFiles(job, job.JobPayloadsStoragePath);

                        break;

                    case InferenceJobState.Starting:
                        await jobsApi.Start(job);

                        break;

                    default:
                        throw new InvalidOperationException($"Unsupported job state {job.State}.");
                    }
                    status = InferenceJobStatus.Success;
                }
            }
            catch (OperationCanceledException ex)
            {
                _logger.Log(LogLevel.Warning, ex, "Job Store Service canceled: {0}");
            }
            catch (InvalidOperationException ex)
            {
                _logger.Log(LogLevel.Warning, ex, "Job Store Service may be disposed or Jobs API returned an error: {0}");
            }
            catch (PayloadUploadException ex)
            {
                _logger.Log(LogLevel.Error, ex, ex.Message);
            }
            catch (Exception ex)
            {
                _logger.Log(LogLevel.Error, ex, "Error communicating with Clara Platform.");
            }
            finally
            {
                if (job != null)
                {
                    try
                    {
                        var updatedJob = await repository.TransitionState(job, status, cancellationToken);

                        if (updatedJob.State == InferenceJobState.Completed ||
                            updatedJob.State == InferenceJobState.Faulted)
                        {
                            CleanupJobFiles(updatedJob);
                        }
                    }
                    catch (Exception ex)
                    {
                        _logger.Log(LogLevel.Error, ex, "Error while transitioning job state.");
                    }
                }
            }
        }