private async Task BackgroundProcessing(CancellationToken cancellationToken)
        {
            _logger.Log(LogLevel.Information, "Job Submitter Hosted Service is running.");
            while (!cancellationToken.IsCancellationRequested)
            {
                InferenceJob job = null;
                try
                {
                    job = await _jobStore.Take(cancellationToken);

                    using (_logger.BeginScope(new Dictionary <string, object> {
                        { "JobId", job.JobId }, { "PayloadId", job.PayloadId }
                    }))
                    {
                        var files = _fileSystem.Directory.GetFiles(job.JobPayloadsStoragePath, "*", System.IO.SearchOption.AllDirectories);
                        await UploadFiles(job, job.JobPayloadsStoragePath, files);

                        await _jobsApi.Start(job);

                        await _jobStore.Update(job, InferenceJobStatus.Success);

                        RemoveFiles(files);
                    }
                }
                catch (OperationCanceledException ex)
                {
                    _logger.Log(LogLevel.Warning, ex, "Job Store Service canceled: {0}");
                }
                catch (InvalidOperationException ex)
                {
                    _logger.Log(LogLevel.Warning, ex, "Job Store Service may be disposed or Jobs API returned an error: {0}");
                }
                catch (Exception ex)
                {
                    _logger.Log(LogLevel.Error, ex, "Error uploading payloads/starting job.");
                    if (job != null)
                    {
                        await _jobStore.Update(job, InferenceJobStatus.Fail);
                    }
                }
            }
            _logger.Log(LogLevel.Information, "Cancellation requested.");
        }
        protected async Task SubmitPipelineJob(string jobName, string pipelineId, JobPriority jobPriority, string basePath, IList <InstanceStorageInfo> instances)
        {
            Guard.Against.NullOrWhiteSpace(pipelineId, nameof(pipelineId));
            if (instances.IsNullOrEmpty())
            {
                throw new ArgumentNullException(nameof(instances));
            }

            jobName = jobName.FixJobName();
            Guard.Against.NullOrWhiteSpace(jobName, nameof(jobName));

            _logger.Log(LogLevel.Information, "Submitting a new job '{0}' with pipeline '{1}', priority={2}, instance count={3}", jobName, pipelineId, jobPriority, instances.Count);

            var job = await _jobsApi.Create(pipelineId, jobName, jobPriority);

            using (_logger.BeginScope(new Dictionary <string, object> {
                { "JobId", job.JobId }, { "PayloadId", job.PayloadId }
            }))
            {
                await UploadFiles(job, basePath, instances);

                await _jobsApi.Start(job);
            }
        }
Example #3
0
        private async Task ProcessNextJob(IJobRepository repository, IJobs jobsApi, CancellationToken cancellationToken)
        {
            InferenceJob       job    = null;
            InferenceJobStatus status = InferenceJobStatus.Fail;

            try
            {
                _logger.Log(LogLevel.Debug, $"Waiting for new job...");
                job = await repository.Take(cancellationToken);

                using (_logger.BeginScope(new LogginDataDictionary <string, object> {
                    { "JobId", job.JobId }, { "PayloadId", job.PayloadId }
                }))
                {
                    switch (job.State)
                    {
                    case InferenceJobState.Creating:
                        await CreateJob(job);

                        break;

                    case InferenceJobState.MetadataUploading:
                        await UploadMetadata(job);

                        break;

                    case InferenceJobState.PayloadUploading:
                        await UploadFiles(job, job.JobPayloadsStoragePath);

                        break;

                    case InferenceJobState.Starting:
                        await jobsApi.Start(job);

                        break;

                    default:
                        throw new InvalidOperationException($"Unsupported job state {job.State}.");
                    }
                    status = InferenceJobStatus.Success;
                }
            }
            catch (OperationCanceledException ex)
            {
                _logger.Log(LogLevel.Warning, ex, "Job Store Service canceled: {0}");
            }
            catch (InvalidOperationException ex)
            {
                _logger.Log(LogLevel.Warning, ex, "Job Store Service may be disposed or Jobs API returned an error: {0}");
            }
            catch (PayloadUploadException ex)
            {
                _logger.Log(LogLevel.Error, ex, ex.Message);
            }
            catch (Exception ex)
            {
                _logger.Log(LogLevel.Error, ex, "Error communicating with Clara Platform.");
            }
            finally
            {
                if (job != null)
                {
                    try
                    {
                        var updatedJob = await repository.TransitionState(job, status, cancellationToken);

                        if (updatedJob.State == InferenceJobState.Completed ||
                            updatedJob.State == InferenceJobState.Faulted)
                        {
                            CleanupJobFiles(updatedJob);
                        }
                    }
                    catch (Exception ex)
                    {
                        _logger.Log(LogLevel.Error, ex, "Error while transitioning job state.");
                    }
                }
            }
        }