Esempio n. 1
0
        public async Task ShallLogErrorOnJobTransitionError()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.Created,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()))
            .Throws(new Exception("error"));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Fail, It.IsAny <CancellationToken>()), Times.Once());
            _logger.VerifyLogging("Error while transitioning job state.", LogLevel.Error, Times.Once());
        }
Esempio n. 2
0
        public async Task TransitionState_Fail_ShallTransitionJob(InferenceJobState initalState, InferenceJobState endingState)
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.State    = initalState;
            job.TryCount = 1;

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());
            _inferenceJobRepository.Setup(p => p.SaveChangesAsync(It.IsAny <CancellationToken>()));
            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            var result = await jobStore.TransitionState(job, InferenceJobStatus.Fail, cancellationSource.Token);

            Assert.Equal(job, result);
            Assert.Equal(endingState, endingState);
            Assert.Equal(2, result.TryCount);
            _logger.VerifyLoggingMessageBeginsWith($"Putting inference job {job.JobId} back to {endingState} state for retry.", LogLevel.Information, Times.Once());
            _inferenceJobRepository.Verify(p => p.SaveChangesAsync(cancellationSource.Token), Times.Once());
        }
        private async Task UpdateInferenceJob(InferenceJob job, CancellationToken cancellationToken = default)
        {
            Guard.Against.Null(job, nameof(job));

            await Policy
            .Handle <Exception>()
            .WaitAndRetryAsync(
                3,
                retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)),
                (exception, timeSpan, retryCount, context) =>
            {
                _logger.Log(LogLevel.Warning, exception, $"Failed to update job. Waiting {timeSpan} before next retry. Retry attempt {retryCount}.");
            })
            .ExecuteAsync(async(cancellationTokenInsideExecution) =>
            {
                _logger.Log(LogLevel.Debug, $"Updating inference job.");
                await _inferenceJobRepository.SaveChangesAsync(cancellationTokenInsideExecution);
                if (job.State == InferenceJobState.Completed || job.State == InferenceJobState.Faulted)
                {
                    _inferenceJobRepository.Detach(job);
                }
                _logger.Log(LogLevel.Debug, $"Inference job updated.");
            }, cancellationToken)
            .ConfigureAwait(false);
        }
        public async Task UpdateSuccess_ShallDeleteJobCrd()
        {
            _kubernetesClient
            .Setup(p => p.DeleteNamespacedCustomObjectWithHttpMessagesAsync(It.IsAny <CustomResourceDefinition>(), It.IsAny <string>()))
            .Returns(Task.FromResult(new HttpOperationResponse <object>
            {
                Response = new HttpResponseMessage()
            }));

            var item = new InferenceJob("/path/to/job", new Job {
                JobId = Guid.NewGuid().ToString(), PayloadId = Guid.NewGuid().ToString()
            });

            var jobStore = new JobStore(
                _loggerFactory.Object,
                _configuration,
                _kubernetesClient.Object,
                _fileSystem);

            await jobStore.Update(item, InferenceJobStatus.Success);

            _logger.VerifyLogging($"Removing job {item.JobId} from job store as completed.", LogLevel.Information, Times.Once());
            _logger.VerifyLogging($"Job {item.JobId} removed from job store.", LogLevel.Information, Times.Once());
            _kubernetesClient.Verify(p => p.DeleteNamespacedCustomObjectWithHttpMessagesAsync(It.IsAny <CustomResourceDefinition>(), item.JobId), Times.Once());
        }
        public async Task ShallFailJobOnException()
        {
            var request = new InferenceJob("/job", new Job {
                JobId = "1", PayloadId = "1"
            });

            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.Update(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _jobsApi.Object,
                _payloadsApi.Object,
                _jobStore.Object,
                _fileSystem.Object);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _logger.VerifyLogging("Error uploading payloads/starting job.", LogLevel.Error, Times.Once());

            _jobStore.Verify(p => p.Update(request, InferenceJobStatus.Fail), Times.Once());
        }
        public async Task UpdateFail_ShallUpdateCountAndUpdateCrd()
        {
            _kubernetesClient
            .Setup(p => p.DeleteNamespacedCustomObjectWithHttpMessagesAsync(It.IsAny <CustomResourceDefinition>(), It.IsAny <string>()))
            .Returns(Task.FromResult(new HttpOperationResponse <object>
            {
                Response = new HttpResponseMessage()
            }));
            _kubernetesClient
            .Setup(p => p.PatchNamespacedCustomObjectWithHttpMessagesAsync(It.IsAny <CustomResourceDefinition>(), It.IsAny <object>(), It.IsAny <string>()))
            .Returns(Task.FromResult(new HttpOperationResponse <object>
            {
                Response = new HttpResponseMessage()
            }));

            var item = new InferenceJob("/path/to/job", new Job {
                JobId = Guid.NewGuid().ToString(), PayloadId = Guid.NewGuid().ToString()
            });

            item.TryCount = 2;

            var jobStore = new JobStore(
                _loggerFactory.Object,
                _configuration,
                _kubernetesClient.Object,
                _fileSystem);

            await jobStore.Update(item, InferenceJobStatus.Fail);

            _logger.VerifyLogging($"Adding job {item.JobId} back to job store for retry.", LogLevel.Debug, Times.Once());
            _logger.VerifyLogging($"Job {item.JobId} added back to job store for retry.", LogLevel.Information, Times.Once());
            _kubernetesClient.Verify(p => p.DeleteNamespacedCustomObjectWithHttpMessagesAsync(It.IsAny <CustomResourceDefinition>(), item.JobId), Times.Never());
            _kubernetesClient.Verify(p => p.PatchNamespacedCustomObjectWithHttpMessagesAsync(It.IsAny <CustomResourceDefinition>(), It.IsAny <object>(), item.JobId), Times.Once());
        }
Esempio n. 7
0
        public async Task Update(InferenceJob request, InferenceJobStatus status)
        {
            if (status == InferenceJobStatus.Success)
            {
                _logger.Log(LogLevel.Information, $"Removing job {request.JobId} from job store as completed.");
                await Delete(request);
            }
            else
            {
                if (++request.TryCount > MaxRetryLimit)
                {
                    _logger.Log(LogLevel.Information, $"Exceeded maximum job submission retries; removing job {request.JobId} from job store.");
                    await Delete(request);
                }
                else
                {
                    _logger.Log(LogLevel.Debug, $"Adding job {request.JobId} back to job store for retry.");
                    request.State = InferenceJobState.Queued;
                    _logger.Log(LogLevel.Debug, $"Updating request {request.JobId} to Queued.");
                    await UpdateInferenceJob(request);

                    _logger.Log(LogLevel.Information, $"Job {request.JobId} added back to job store for retry.");
                }
            }
        }
Esempio n. 8
0
        public async Task TransitionState_Fail_ShallPutJobInFaultedState()
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.State    = InferenceJobState.Creating;
            job.TryCount = 3;

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());
            _inferenceJobRepository.Setup(p => p.SaveChangesAsync(It.IsAny <CancellationToken>()));
            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            var result = await jobStore.TransitionState(job, InferenceJobStatus.Fail, cancellationSource.Token);

            Assert.Equal(job, result);
            Assert.Equal(InferenceJobState.Faulted, result.State);
            Assert.Equal(4, result.TryCount);
            _logger.VerifyLoggingMessageBeginsWith($"Job {job.JobId} exceeded maximum number of retries.", LogLevel.Warning, Times.Once());
            _inferenceJobRepository.Verify(p => p.SaveChangesAsync(cancellationSource.Token), Times.Once());
        }
Esempio n. 9
0
        public async Task StartsJobAndTransitionsState()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.Starting,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _jobsApi.Setup(p => p.Start(It.IsAny <Job>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once());
            _jobsApi.Verify(p => p.Start(It.IsAny <Job>()), Times.Once());
        }
Esempio n. 10
0
        public async Task Take_ShallReturnAJob(InferenceJobState initalState, InferenceJobState endingState)
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.State = initalState;

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());

            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            var result = await jobStore.Take(cancellationSource.Token);

            Assert.Equal(job, result);
            Assert.Equal(endingState, job.State);
            _logger.VerifyLoggingMessageBeginsWith($"Updating inference job {job.JobId} from {initalState } to {endingState}.", LogLevel.Information, Times.Once());
        }
        private void MakeACopyOfPayload(InferenceJob request)
        {
            Guard.Against.Null(request, nameof(request));

            _logger.Log(LogLevel.Information, $"Copying {request.Instances.Count} instances to {request.JobPayloadsStoragePath}.");
            CopyInstances(request);
            CopyResources(request);
        }
Esempio n. 12
0
        private async Task UploadFiles(InferenceJob job, string basePath)
        {
            Guard.Against.Null(job, nameof(job));
            Guard.Against.Null(basePath, nameof(basePath)); // allow empty

            var filePaths = _fileSystem.Directory.GetFiles(job.JobPayloadsStoragePath, "*", System.IO.SearchOption.AllDirectories);

            if (!basePath.EndsWith(_fileSystem.Path.DirectorySeparatorChar))
            {
                basePath += _fileSystem.Path.DirectorySeparatorChar;
            }

            using var logger = _logger.BeginScope(new LogginDataDictionary <string, object> { { "BasePath", basePath }, { "JobId", job.JobId }, { "PayloadId", job.PayloadId } });

            _logger.Log(LogLevel.Information, "Uploading {0} files.", filePaths.LongLength);
            var failureCount = 0;

            var options = new ExecutionDataflowBlockOptions
            {
                MaxDegreeOfParallelism = _configuration.Value.Services.Platform.ParallelUploads
            };

            var block = new ActionBlock <string>(async(file) =>
            {
                try
                {
                    using var scope = _serviceScopeFactory.CreateScope();
                    var payloadsApi = scope.ServiceProvider.GetRequiredService <IPayloads>();
                    var name        = file.Replace(basePath, "");
                    await payloadsApi.Upload(job.PayloadId, name, file);

                    // remove file immediately upon success upload to avoid another upload on next retry
                    _cleanupQueue.QueueInstance(file);
                }
                catch (Exception ex)
                {
                    _logger.Log(LogLevel.Error, ex, $"Error uploading file: {file}.");
                    Interlocked.Increment(ref failureCount);
                }
            }, options);

            foreach (var file in filePaths)
            {
                block.Post(file);
            }

            block.Complete();
            await block.Completion;

            if (failureCount != 0)
            {
                throw new PayloadUploadException($"Failed to upload {failureCount} files.");
            }

            _logger.Log(LogLevel.Information, "Upload to payload completed.");
        }
Esempio n. 13
0
 private JobCustomResource CreateFromRequest(InferenceJob request)
 {
     return(new JobCustomResource
     {
         Kind = CustomResourceDefinition.JobsCrd.Kind,
         ApiVersion = CustomResourceDefinition.JobsCrd.ApiVersion,
         Metadata = new k8s.Models.V1ObjectMeta
         {
             Name = request.JobId
         },
         Spec = request,
         Status = InferenceJobCrdStatus.Default
     });
 }
Esempio n. 14
0
        private async Task CreateJob(InferenceJob job)
        {
            Guard.Against.Null(job, nameof(job));

            var metadata = new JobMetadataBuilder();

            metadata.AddSourceName(job.Source);

            using var scope = _serviceScopeFactory.CreateScope();
            var jobsApi    = scope.ServiceProvider.GetRequiredService <IJobs>();
            var createdJob = await jobsApi.Create(job.PipelineId, job.JobName, job.Priority, metadata);

            job.JobId     = createdJob.JobId;
            job.PayloadId = createdJob.PayloadId;
            _logger.Log(LogLevel.Information, $"New JobId={job.JobId}, PayloadId={job.PayloadId}.");
        }
        private void ConfigureStoragePath(InferenceJob job)
        {
            Guard.Against.Null(job, nameof(job));

            var targetStoragePath = string.Empty;

            if (_fileSystem.Directory.TryGenerateDirectory(_fileSystem.Path.Combine(_configuration.Value.Storage.TemporaryDataDirFullPath, "jobs", $"{job.JobId}"), out targetStoragePath))
            {
                _logger.Log(LogLevel.Information, $"Job payloads directory set to {targetStoragePath}");
                job.SetStoragePath(targetStoragePath);
            }
            else
            {
                throw new JobStoreException($"Failed to generate a temporary storage location");
            }
        }
        public async Task <InferenceJob> TransitionState(InferenceJob job, InferenceJobStatus status, CancellationToken cancellationToken = default)
        {
            Guard.Against.Null(job, nameof(job));

            if (status == InferenceJobStatus.Success)
            {
                var originalState = job.State;
                job.State = job.State switch
                {
                    InferenceJobState.Creating => InferenceJobState.Created,
                    InferenceJobState.MetadataUploading => InferenceJobState.MetadataUploaded,
                    InferenceJobState.PayloadUploading => InferenceJobState.PayloadUploaded,
                    InferenceJobState.Starting => InferenceJobState.Completed,
                    _ => throw new ApplicationException($"unsupported job state {job.State}")
                };
                job.TryCount   = 0;
                job.LastUpdate = DateTime.MinValue;

                _logger.Log(LogLevel.Information, $"Updating inference job state {job.JobId} from {originalState } to {job.State}.");
                await UpdateInferenceJob(job, cancellationToken);
            }
            else
            {
                if (++job.TryCount > _configuration.Value.Services.Platform.MaxRetries)
                {
                    _logger.Log(LogLevel.Warning, $"Job {job.JobId} exceeded maximum number of retries.");
                    job.State = InferenceJobState.Faulted;
                }
                else
                {
                    job.State = job.State switch
                    {
                        InferenceJobState.Creating => InferenceJobState.Queued,
                        InferenceJobState.MetadataUploading => InferenceJobState.Created,
                        InferenceJobState.PayloadUploading => InferenceJobState.MetadataUploaded,
                        InferenceJobState.Starting => InferenceJobState.PayloadUploaded,
                        _ => throw new ApplicationException($"unsupported job state {job.State}")
                    };
                    _logger.Log(LogLevel.Information, $"Putting inference job {job.JobId} back to {job.State} state for retry.");
                }
                job.LastUpdate = DateTime.UtcNow;
                await UpdateInferenceJob(job, cancellationToken);
            }

            return(job);
        }
Esempio n. 17
0
        private async Task Delete(InferenceJob request)
        {
            var operationResponse = await Policy
                                    .Handle <HttpOperationException>()
                                    .WaitAndRetryAsync(
                3,
                retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)),
                (exception, timeSpan, retryCount, context) =>
            {
                _logger.Log(LogLevel.Warning, exception, $"Failed to delete job {request.JobId} in CRD. Waiting {timeSpan} before next retry. Retry attempt {retryCount}. {(exception as HttpOperationException)?.Response?.Content}");
            })
                                    .ExecuteAsync(async() => await _kubernetesClient.DeleteNamespacedCustomObjectWithHttpMessagesAsync(CustomResourceDefinition.JobsCrd, request.JobId))
                                    .ConfigureAwait(false);

            operationResponse.Response.EnsureSuccessStatusCode();
            _logger.Log(LogLevel.Information, $"Job {request.JobId} removed from job store.");
        }
Esempio n. 18
0
        private void CleanupJobFiles(InferenceJob job)
        {
            Guard.Against.Null(job, nameof(job));

            if (!_fileSystem.Directory.Exists(job.JobPayloadsStoragePath))
            {
                return;
            }

            using var _ = (_logger.BeginScope(new LogginDataDictionary <string, object> { { "JobId", job.JobId }, { "PayloadId", job.PayloadId } }));
            var filePaths = _fileSystem.Directory.GetFiles(job.JobPayloadsStoragePath, "*", System.IO.SearchOption.AllDirectories);

            _logger.Log(LogLevel.Debug, $"Notifying Disk Reclaimer to delete {filePaths.LongLength} files.");
            foreach (var file in filePaths)
            {
                _cleanupQueue.QueueInstance(file);
            }
            _logger.Log(LogLevel.Information, $"Notified Disk Reclaimer to delete {filePaths.LongLength} files.");
        }
Esempio n. 19
0
        public async Task Add_ShallAddItem()
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.Instances.Add(InstanceGenerator.GenerateInstance("./aet", "aet", fileSystem: _fileSystem));

            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            await jobStore.Add(job);

            _inferenceJobRepository.Verify(p => p.AddAsync(It.IsAny <InferenceJob>(), It.IsAny <CancellationToken>()), Times.Once());
            _inferenceJobRepository.Verify(p => p.SaveChangesAsync(It.IsAny <CancellationToken>()), Times.Once());
        }
        private async Task BackgroundProcessing(CancellationToken cancellationToken)
        {
            _logger.Log(LogLevel.Information, "Job Submitter Hosted Service is running.");
            while (!cancellationToken.IsCancellationRequested)
            {
                InferenceJob job = null;
                try
                {
                    job = await _jobStore.Take(cancellationToken);

                    using (_logger.BeginScope(new Dictionary <string, object> {
                        { "JobId", job.JobId }, { "PayloadId", job.PayloadId }
                    }))
                    {
                        var files = _fileSystem.Directory.GetFiles(job.JobPayloadsStoragePath, "*", System.IO.SearchOption.AllDirectories);
                        await UploadFiles(job, job.JobPayloadsStoragePath, files);

                        await _jobsApi.Start(job);

                        await _jobStore.Update(job, InferenceJobStatus.Success);

                        RemoveFiles(files);
                    }
                }
                catch (OperationCanceledException ex)
                {
                    _logger.Log(LogLevel.Warning, ex, "Job Store Service canceled: {0}");
                }
                catch (InvalidOperationException ex)
                {
                    _logger.Log(LogLevel.Warning, ex, "Job Store Service may be disposed or Jobs API returned an error: {0}");
                }
                catch (Exception ex)
                {
                    _logger.Log(LogLevel.Error, ex, "Error uploading payloads/starting job.");
                    if (job != null)
                    {
                        await _jobStore.Update(job, InferenceJobStatus.Fail);
                    }
                }
            }
            _logger.Log(LogLevel.Information, "Cancellation requested.");
        }
        private void CopyResources(InferenceJob request)
        {
            Guard.Against.Null(request, nameof(request));

            var files        = new Stack <string>(request.Resources);
            var retrySleepMs = 1000;
            var retryCount   = 0;

            while (files.Count > 0)
            {
                try
                {
                    var target = _fileSystem.Path.GetFhirStoragePath(request.JobPayloadsStoragePath);
                    _fileSystem.Directory.CreateDirectoryIfNotExists(target);

                    var file     = files.Peek();
                    var filename = _fileSystem.Path.GetFileName(file);
                    var destPath = _fileSystem.Path.Combine(target, filename);
                    _fileSystem.File.Copy(file, destPath, true);
                    _logger.Log(LogLevel.Debug, $"Resource {filename} moved to {destPath}");
                    files.Pop();
                }
                catch (IOException ex) when((ex.HResult & 0xFFFF) == ERROR_HANDLE_DISK_FULL || (ex.HResult & 0xFFFF) == ERROR_DISK_FULL)
                {
                    if (++retryCount > 3)
                    {
                        _logger.Log(LogLevel.Error, ex, $"Error copying file to {request.JobPayloadsStoragePath}; destination may be out of disk space.  Exceeded maximum retries.");
                        throw;
                    }
                    _logger.Log(LogLevel.Error, ex, $"Error copying file to {request.JobPayloadsStoragePath}; destination may be out of disk space, will retry in {retrySleepMs}ms.");
                    Thread.Sleep(retryCount * retrySleepMs);
                }
                catch (Exception ex)
                {
                    _logger.Log(LogLevel.Error, ex, $"Failed to copy file {request.JobPayloadsStoragePath}.");
                    throw;
                }
            }

            _logger.Log(
                files.Count == 0 ? LogLevel.Information : LogLevel.Warning, $"Copied {request.Resources.Count - files.Count:D} files to '{request.JobPayloadsStoragePath}'.");
        }
Esempio n. 22
0
        public async Task Add_ShallRetryOnFailure()
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.Instances.Add(InstanceGenerator.GenerateInstance("./aet", "aet", fileSystem: _fileSystem));

            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            _inferenceJobRepository.Setup(p => p.AddAsync(It.IsAny <InferenceJob>(), It.IsAny <CancellationToken>())).Throws(new Exception("error"));

            await Assert.ThrowsAsync <Exception>(async() => await jobStore.Add(job));

            _logger.VerifyLoggingMessageBeginsWith($"Error saving inference job.", LogLevel.Error, Times.Exactly(3));
        }
Esempio n. 23
0
        private async Task UploadMetadata(InferenceJob job)
        {
            Guard.Against.Null(job, nameof(job));

            using var scope = _serviceScopeFactory.CreateScope();
            var files = _fileSystem.Directory.GetFiles(job.JobPayloadsStoragePath, "*", System.IO.SearchOption.AllDirectories);

            var jobsMetadataFactory = scope.ServiceProvider.GetRequiredService <IJobMetadataBuilderFactory>();

            var metadata = jobsMetadataFactory.Build(
                _configuration.Value.Services.Platform.UploadMetadata,
                _configuration.Value.Services.Platform.MetadataDicomSource,
                files);

            if (!metadata.IsNullOrEmpty())
            {
                var jobsApi = scope.ServiceProvider.GetRequiredService <IJobs>();
                await jobsApi.AddMetadata(job, metadata);
            }
        }
Esempio n. 24
0
        public async Task UploadsMetadataAndTransitionsState()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.MetadataUploading,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <SearchOption>()))
            .Returns(new string[] { "/file1", "/file2" });
            _jobMetadataBuilderFactory.Setup(p => p.Build(It.IsAny <bool>(), It.IsAny <IReadOnlyList <string> >(), It.IsAny <IReadOnlyList <string> >()))
            .Returns(new JobMetadataBuilder()
            {
                { "Test", "TestValue" }
            });

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once());
            _fileSystem.Verify(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <SearchOption>()), Times.Once());
            _jobMetadataBuilderFactory.Verify(p => p.Build(It.IsAny <bool>(), It.IsAny <IReadOnlyList <string> >(), It.IsAny <IReadOnlyList <string> >()), Times.Once());
        }
Esempio n. 25
0
        public async Task ShallFailJobOnPayloadUploadException()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.PayloadUploading,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), System.IO.SearchOption.AllDirectories))
            .Returns(new string[] { "/file1", "file2", "file3" });
            _payloadsApi.Setup(p => p.Upload(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>()))
            .Throws(new Exception("error"));
            _instanceCleanupQueue.Setup(p => p.QueueInstance(It.IsAny <string>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _logger.VerifyLoggingMessageBeginsWith("Error uploading file:", LogLevel.Error, Times.Exactly(3));
            _logger.VerifyLogging($"Failed to upload {3} files.", LogLevel.Error, Times.Once());

            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Fail, It.IsAny <CancellationToken>()), Times.Once());
            _instanceCleanupQueue.Verify(p => p.QueueInstance(It.IsAny <string>()), Times.Never());
        }
Esempio n. 26
0
        public async Task UploadsPayloadAndTransitionsState()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.PayloadUploading,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), System.IO.SearchOption.AllDirectories))
            .Returns(new string[] { "/file1", "/file2", "/file3" });
            _payloadsApi.Setup(p => p.Upload(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>()));
            _instanceCleanupQueue.Setup(p => p.QueueInstance(It.IsAny <string>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _logger.VerifyLogging("Uploading 3 files.", LogLevel.Information, Times.Once());
            _logger.VerifyLogging("Upload to payload completed.", LogLevel.Information, Times.Once());

            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once());
            _jobsApi.Verify(p => p.AddMetadata(It.IsAny <Job>(), It.IsAny <Dictionary <string, string> >()), Times.Never());
            _instanceCleanupQueue.Verify(p => p.QueueInstance(It.IsAny <string>()), Times.Exactly(3));
        }
Esempio n. 27
0
        public async Task Add_ShallRetryCopyThenThrow()
        {
            var fileSystem = new Mock <IFileSystem>();

            fileSystem.Setup(p => p.Directory).Returns(_fileSystem.Directory);
            fileSystem.Setup(p => p.Path).Returns(_fileSystem.Path);
            fileSystem.Setup(p => p.File.Create(It.IsAny <string>()))
            .Returns((string path) => _fileSystem.File.Create(path));
            fileSystem.Setup(p => p.File.Copy(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <bool>()))
            .Throws(new IOException("error", ClaraJobRepository.ERROR_DISK_FULL));

            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.Instances.Add(InstanceGenerator.GenerateInstance("./aet", "aet", fileSystem: fileSystem.Object));
            _configuration.Value.Storage.Temporary = "./aet";

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());

            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                fileSystem.Object,
                _inferenceJobRepository.Object);

            await Assert.ThrowsAsync <IOException>(async() => await jobStore.Add(job));

            _logger.VerifyLoggingMessageBeginsWith($"Error copying file to {job.JobPayloadsStoragePath}; destination may be out of disk space, will retry in {1000}ms.", LogLevel.Error, Times.Exactly(3));
            _logger.VerifyLoggingMessageBeginsWith($"Error copying file to {job.JobPayloadsStoragePath}; destination may be out of disk space.  Exceeded maximum retries.", LogLevel.Error, Times.Once());
        }
        public async Task ShallCompleteRequest()
        {
            var request = new InferenceJob("/job", new Job {
                JobId = "JID", PayloadId = "PID"
            });

            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.Update(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>()));
            _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), System.IO.SearchOption.AllDirectories))
            .Returns(new string[] { "/file1", "file2", "file3" });
            _payloadsApi.Setup(p => p.Upload(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <IEnumerable <string> >()));
            _jobsApi.Setup(p => p.Start(It.IsAny <Job>()));
            _instanceCleanupQueue.Setup(p => p.QueueInstance(It.IsAny <string>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _jobsApi.Object,
                _payloadsApi.Object,
                _jobStore.Object,
                _fileSystem.Object);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _logger.VerifyLogging("Uploading 3 files.", LogLevel.Information, Times.Once());
            _logger.VerifyLogging("Upload to payload completed.", LogLevel.Information, Times.Once());

            _jobsApi.Verify(p => p.Start(request), Times.Once());
            _jobStore.Verify(p => p.Update(request, InferenceJobStatus.Success), Times.Once());
            _instanceCleanupQueue.Verify(p => p.QueueInstance(It.IsAny <string>()), Times.Exactly(3));
        }
Esempio n. 29
0
        private void MakeACopyOfPayload(InferenceJob request)
        {
            _logger.Log(LogLevel.Information, $"Copying {request.Instances.Count} instances to {request.JobPayloadsStoragePath}.");
            var files        = new Stack <InstanceStorageInfo>(request.Instances);
            var retrySleepMs = 1000;
            var retryCount   = 0;

            while (files.Count > 0)
            {
                try
                {
                    var file     = files.Peek();
                    var destPath = _fileSystem.Path.Combine(request.JobPayloadsStoragePath, $"{file.SopInstanceUid}.dcm");
                    _fileSystem.File.Copy(file.InstanceStorageFullPath, destPath, true);
                    _logger.Log(LogLevel.Debug, $"Instance {file.SopInstanceUid} moved to {destPath}");
                    files.Pop();
                }
                catch (IOException ex) when((ex.HResult & 0xFFFF) == 0x27 || (ex.HResult & 0xFFFF) == 0x70)
                {
                    if (++retryCount > 3)
                    {
                        _logger.Log(LogLevel.Error, ex, $"Error copying file to {request.JobPayloadsStoragePath}; destination may be out of disk space.  Exceeded maximum retries.");
                        throw;
                    }
                    _logger.Log(LogLevel.Error, ex, $"Error copying file to {request.JobPayloadsStoragePath}; destination may be out of disk space, will retry in {retrySleepMs}ms");
                    Thread.Sleep(retryCount * retrySleepMs);
                }
                catch (Exception ex)
                {
                    _logger.Log(LogLevel.Error, ex, $"Failed to copy file {request.JobPayloadsStoragePath}");
                    throw;
                }
            }

            _logger.Log(
                files.Count == 0 ? LogLevel.Information : LogLevel.Warning,
                $"Copied {request.Instances.Count - files.Count} files to {request.JobPayloadsStoragePath}.");
        }
        private void CopyInstances(InferenceJob request)
        {
            Guard.Against.Null(request, nameof(request));

            var files        = new Stack <InstanceStorageInfo>(request.Instances);
            var retrySleepMs = 1000;
            var retryCount   = 0;

            while (files.Count > 0)
            {
                try
                {
                    var file            = files.Peek();
                    var destinationFile = file.CopyTo(_fileSystem.Path.GetDicomStoragePath(request.JobPayloadsStoragePath));
                    _logger.Log(LogLevel.Debug, $"Instance {file.SopInstanceUid} moved to {destinationFile}");
                    files.Pop();
                }
                catch (IOException ex) when((ex.HResult & 0xFFFF) == ERROR_HANDLE_DISK_FULL || (ex.HResult & 0xFFFF) == ERROR_DISK_FULL)
                {
                    if (++retryCount > 3)
                    {
                        _logger.Log(LogLevel.Error, ex, $"Error copying file to {request.JobPayloadsStoragePath}; destination may be out of disk space.  Exceeded maximum retries.");
                        throw;
                    }
                    _logger.Log(LogLevel.Error, ex, $"Error copying file to {request.JobPayloadsStoragePath}; destination may be out of disk space, will retry in {retrySleepMs}ms.");
                    Thread.Sleep(retryCount * retrySleepMs);
                }
                catch (Exception ex)
                {
                    _logger.Log(LogLevel.Error, ex, $"Failed to copy file {request.JobPayloadsStoragePath}.");
                    throw;
                }
            }

            _logger.Log(
                files.Count == 0 ? LogLevel.Information : LogLevel.Warning, $"Copied {request.Instances.Count - files.Count:D} files to '{request.JobPayloadsStoragePath}'.");
        }