public async Task ShallLogErrorOnJobTransitionError() { var request = new InferenceJob { JobId = "1", PayloadId = "1", State = InferenceJobState.Created, Source = "Source" }; request.SetStoragePath("/job"); _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>())) .Returns(Task.FromResult(request)) .Returns(() => { _cancellationTokenSource.Cancel(); throw new OperationCanceledException(); }); _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>())) .Throws(new Exception("error")); var service = new JobSubmissionService( _instanceCleanupQueue.Object, _logger.Object, _serviceScopeFactory.Object, _fileSystem.Object, _configuration); await service.StartAsync(_cancellationTokenSource.Token); BlockUntilCanceled(_cancellationTokenSource.Token); _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Fail, It.IsAny <CancellationToken>()), Times.Once()); _logger.VerifyLogging("Error while transitioning job state.", LogLevel.Error, Times.Once()); }
public async Task TransitionState_Fail_ShallPutJobInFaultedState() { var job = new InferenceJob(); job.JobId = Guid.NewGuid().ToString(); job.PayloadId = Guid.NewGuid().ToString(); job.SetStoragePath("/path/to/job"); job.State = InferenceJobState.Creating; job.TryCount = 3; var cancellationSource = new CancellationTokenSource(); _inferenceJobRepository.SetupSequence(p => p.AsQueryable()) .Returns((new List <InferenceJob>() { job }).AsQueryable()); _inferenceJobRepository.Setup(p => p.SaveChangesAsync(It.IsAny <CancellationToken>())); var jobStore = new ClaraJobRepository( _logger.Object, _configuration, _fileSystem, _inferenceJobRepository.Object); var result = await jobStore.TransitionState(job, InferenceJobStatus.Fail, cancellationSource.Token); Assert.Equal(job, result); Assert.Equal(InferenceJobState.Faulted, result.State); Assert.Equal(4, result.TryCount); _logger.VerifyLoggingMessageBeginsWith($"Job {job.JobId} exceeded maximum number of retries.", LogLevel.Warning, Times.Once()); _inferenceJobRepository.Verify(p => p.SaveChangesAsync(cancellationSource.Token), Times.Once()); }
public async Task StartsJobAndTransitionsState() { var request = new InferenceJob { JobId = "1", PayloadId = "1", State = InferenceJobState.Starting, Source = "Source" }; request.SetStoragePath("/job"); _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>())) .Returns(Task.FromResult(request)) .Returns(() => { _cancellationTokenSource.Cancel(); throw new OperationCanceledException(); }); _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>())); _jobsApi.Setup(p => p.Start(It.IsAny <Job>())); var service = new JobSubmissionService( _instanceCleanupQueue.Object, _logger.Object, _serviceScopeFactory.Object, _fileSystem.Object, _configuration); await service.StartAsync(_cancellationTokenSource.Token); BlockUntilCanceled(_cancellationTokenSource.Token); _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once()); _jobsApi.Verify(p => p.Start(It.IsAny <Job>()), Times.Once()); }
public async Task TransitionState_Fail_ShallTransitionJob(InferenceJobState initalState, InferenceJobState endingState) { var job = new InferenceJob(); job.JobId = Guid.NewGuid().ToString(); job.PayloadId = Guid.NewGuid().ToString(); job.SetStoragePath("/path/to/job"); job.State = initalState; job.TryCount = 1; var cancellationSource = new CancellationTokenSource(); _inferenceJobRepository.SetupSequence(p => p.AsQueryable()) .Returns((new List <InferenceJob>() { job }).AsQueryable()); _inferenceJobRepository.Setup(p => p.SaveChangesAsync(It.IsAny <CancellationToken>())); var jobStore = new ClaraJobRepository( _logger.Object, _configuration, _fileSystem, _inferenceJobRepository.Object); var result = await jobStore.TransitionState(job, InferenceJobStatus.Fail, cancellationSource.Token); Assert.Equal(job, result); Assert.Equal(endingState, endingState); Assert.Equal(2, result.TryCount); _logger.VerifyLoggingMessageBeginsWith($"Putting inference job {job.JobId} back to {endingState} state for retry.", LogLevel.Information, Times.Once()); _inferenceJobRepository.Verify(p => p.SaveChangesAsync(cancellationSource.Token), Times.Once()); }
public async Task Take_ShallReturnAJob(InferenceJobState initalState, InferenceJobState endingState) { var job = new InferenceJob(); job.JobId = Guid.NewGuid().ToString(); job.PayloadId = Guid.NewGuid().ToString(); job.SetStoragePath("/path/to/job"); job.State = initalState; var cancellationSource = new CancellationTokenSource(); _inferenceJobRepository.SetupSequence(p => p.AsQueryable()) .Returns((new List <InferenceJob>() { job }).AsQueryable()); var jobStore = new ClaraJobRepository( _logger.Object, _configuration, _fileSystem, _inferenceJobRepository.Object); var result = await jobStore.Take(cancellationSource.Token); Assert.Equal(job, result); Assert.Equal(endingState, job.State); _logger.VerifyLoggingMessageBeginsWith($"Updating inference job {job.JobId} from {initalState } to {endingState}.", LogLevel.Information, Times.Once()); }
private void ConfigureStoragePath(InferenceJob job) { Guard.Against.Null(job, nameof(job)); var targetStoragePath = string.Empty; if (_fileSystem.Directory.TryGenerateDirectory(_fileSystem.Path.Combine(_configuration.Value.Storage.TemporaryDataDirFullPath, "jobs", $"{job.JobId}"), out targetStoragePath)) { _logger.Log(LogLevel.Information, $"Job payloads directory set to {targetStoragePath}"); job.SetStoragePath(targetStoragePath); } else { throw new JobStoreException($"Failed to generate a temporary storage location"); } }
public async Task UploadsPayloadAndTransitionsState() { var request = new InferenceJob { JobId = "1", PayloadId = "1", State = InferenceJobState.PayloadUploading, Source = "Source" }; request.SetStoragePath("/job"); _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>())) .Returns(Task.FromResult(request)) .Returns(() => { _cancellationTokenSource.Cancel(); throw new OperationCanceledException(); }); _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>())); _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), System.IO.SearchOption.AllDirectories)) .Returns(new string[] { "/file1", "/file2", "/file3" }); _payloadsApi.Setup(p => p.Upload(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>())); _instanceCleanupQueue.Setup(p => p.QueueInstance(It.IsAny <string>())); var service = new JobSubmissionService( _instanceCleanupQueue.Object, _logger.Object, _serviceScopeFactory.Object, _fileSystem.Object, _configuration); await service.StartAsync(_cancellationTokenSource.Token); BlockUntilCanceled(_cancellationTokenSource.Token); _logger.VerifyLogging("Uploading 3 files.", LogLevel.Information, Times.Once()); _logger.VerifyLogging("Upload to payload completed.", LogLevel.Information, Times.Once()); _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once()); _jobsApi.Verify(p => p.AddMetadata(It.IsAny <Job>(), It.IsAny <Dictionary <string, string> >()), Times.Never()); _instanceCleanupQueue.Verify(p => p.QueueInstance(It.IsAny <string>()), Times.Exactly(3)); }
public async Task ShallFailJobOnPayloadUploadException() { var request = new InferenceJob { JobId = "1", PayloadId = "1", State = InferenceJobState.PayloadUploading, Source = "Source" }; request.SetStoragePath("/job"); _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>())) .Returns(Task.FromResult(request)) .Returns(() => { _cancellationTokenSource.Cancel(); throw new OperationCanceledException(); }); _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>())); _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), System.IO.SearchOption.AllDirectories)) .Returns(new string[] { "/file1", "file2", "file3" }); _payloadsApi.Setup(p => p.Upload(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>())) .Throws(new Exception("error")); _instanceCleanupQueue.Setup(p => p.QueueInstance(It.IsAny <string>())); var service = new JobSubmissionService( _instanceCleanupQueue.Object, _logger.Object, _serviceScopeFactory.Object, _fileSystem.Object, _configuration); await service.StartAsync(_cancellationTokenSource.Token); BlockUntilCanceled(_cancellationTokenSource.Token); _logger.VerifyLoggingMessageBeginsWith("Error uploading file:", LogLevel.Error, Times.Exactly(3)); _logger.VerifyLogging($"Failed to upload {3} files.", LogLevel.Error, Times.Once()); _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Fail, It.IsAny <CancellationToken>()), Times.Once()); _instanceCleanupQueue.Verify(p => p.QueueInstance(It.IsAny <string>()), Times.Never()); }
public async Task UploadsMetadataAndTransitionsState() { var request = new InferenceJob { JobId = "1", PayloadId = "1", State = InferenceJobState.MetadataUploading, Source = "Source" }; request.SetStoragePath("/job"); _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>())) .Returns(Task.FromResult(request)) .Returns(() => { _cancellationTokenSource.Cancel(); throw new OperationCanceledException(); }); _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>())); _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <SearchOption>())) .Returns(new string[] { "/file1", "/file2" }); _jobMetadataBuilderFactory.Setup(p => p.Build(It.IsAny <bool>(), It.IsAny <IReadOnlyList <string> >(), It.IsAny <IReadOnlyList <string> >())) .Returns(new JobMetadataBuilder() { { "Test", "TestValue" } }); var service = new JobSubmissionService( _instanceCleanupQueue.Object, _logger.Object, _serviceScopeFactory.Object, _fileSystem.Object, _configuration); await service.StartAsync(_cancellationTokenSource.Token); BlockUntilCanceled(_cancellationTokenSource.Token); _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once()); _fileSystem.Verify(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <SearchOption>()), Times.Once()); _jobMetadataBuilderFactory.Verify(p => p.Build(It.IsAny <bool>(), It.IsAny <IReadOnlyList <string> >(), It.IsAny <IReadOnlyList <string> >()), Times.Once()); }
public async Task Add_ShallRetryCopyThenThrow() { var fileSystem = new Mock <IFileSystem>(); fileSystem.Setup(p => p.Directory).Returns(_fileSystem.Directory); fileSystem.Setup(p => p.Path).Returns(_fileSystem.Path); fileSystem.Setup(p => p.File.Create(It.IsAny <string>())) .Returns((string path) => _fileSystem.File.Create(path)); fileSystem.Setup(p => p.File.Copy(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <bool>())) .Throws(new IOException("error", ClaraJobRepository.ERROR_DISK_FULL)); var job = new InferenceJob(); job.JobId = Guid.NewGuid().ToString(); job.PayloadId = Guid.NewGuid().ToString(); job.SetStoragePath("/path/to/job"); job.Instances.Add(InstanceGenerator.GenerateInstance("./aet", "aet", fileSystem: fileSystem.Object)); _configuration.Value.Storage.Temporary = "./aet"; var cancellationSource = new CancellationTokenSource(); _inferenceJobRepository.SetupSequence(p => p.AsQueryable()) .Returns((new List <InferenceJob>() { job }).AsQueryable()); var jobStore = new ClaraJobRepository( _logger.Object, _configuration, fileSystem.Object, _inferenceJobRepository.Object); await Assert.ThrowsAsync <IOException>(async() => await jobStore.Add(job)); _logger.VerifyLoggingMessageBeginsWith($"Error copying file to {job.JobPayloadsStoragePath}; destination may be out of disk space, will retry in {1000}ms.", LogLevel.Error, Times.Exactly(3)); _logger.VerifyLoggingMessageBeginsWith($"Error copying file to {job.JobPayloadsStoragePath}; destination may be out of disk space. Exceeded maximum retries.", LogLevel.Error, Times.Once()); }
public async Task Add_ThrowsWhenFailToCopy() { var fileSystem = new Mock <IFileSystem>(); fileSystem.Setup(p => p.Directory).Returns(_fileSystem.Directory); fileSystem.Setup(p => p.Path).Returns(_fileSystem.Path); fileSystem.Setup(p => p.File.Create(It.IsAny <string>())) .Returns((string path) => _fileSystem.File.Create(path)); fileSystem.Setup(p => p.File.Copy(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <bool>())).Throws(new Exception("error")); var job = new InferenceJob(); job.JobId = Guid.NewGuid().ToString(); job.PayloadId = Guid.NewGuid().ToString(); job.SetStoragePath("/path/to/job"); job.Instances.Add(InstanceGenerator.GenerateInstance("./aet", "aet", fileSystem: fileSystem.Object)); _configuration.Value.Storage.Temporary = "./aet"; var cancellationSource = new CancellationTokenSource(); _inferenceJobRepository.SetupSequence(p => p.AsQueryable()) .Returns((new List <InferenceJob>() { job }).AsQueryable()); var jobStore = new ClaraJobRepository( _logger.Object, _configuration, fileSystem.Object, _inferenceJobRepository.Object); await Assert.ThrowsAsync <Exception>(async() => await jobStore.Add(job)); _logger.VerifyLoggingMessageBeginsWith($"Failed to copy file {job.JobPayloadsStoragePath}.", LogLevel.Error, Times.Once()); }