예제 #1
0
        public async Task ShallLogErrorOnJobTransitionError()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.Created,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()))
            .Throws(new Exception("error"));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Fail, It.IsAny <CancellationToken>()), Times.Once());
            _logger.VerifyLogging("Error while transitioning job state.", LogLevel.Error, Times.Once());
        }
예제 #2
0
        public async Task TransitionState_Fail_ShallPutJobInFaultedState()
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.State    = InferenceJobState.Creating;
            job.TryCount = 3;

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());
            _inferenceJobRepository.Setup(p => p.SaveChangesAsync(It.IsAny <CancellationToken>()));
            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            var result = await jobStore.TransitionState(job, InferenceJobStatus.Fail, cancellationSource.Token);

            Assert.Equal(job, result);
            Assert.Equal(InferenceJobState.Faulted, result.State);
            Assert.Equal(4, result.TryCount);
            _logger.VerifyLoggingMessageBeginsWith($"Job {job.JobId} exceeded maximum number of retries.", LogLevel.Warning, Times.Once());
            _inferenceJobRepository.Verify(p => p.SaveChangesAsync(cancellationSource.Token), Times.Once());
        }
예제 #3
0
        public async Task StartsJobAndTransitionsState()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.Starting,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _jobsApi.Setup(p => p.Start(It.IsAny <Job>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once());
            _jobsApi.Verify(p => p.Start(It.IsAny <Job>()), Times.Once());
        }
예제 #4
0
        public async Task TransitionState_Fail_ShallTransitionJob(InferenceJobState initalState, InferenceJobState endingState)
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.State    = initalState;
            job.TryCount = 1;

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());
            _inferenceJobRepository.Setup(p => p.SaveChangesAsync(It.IsAny <CancellationToken>()));
            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            var result = await jobStore.TransitionState(job, InferenceJobStatus.Fail, cancellationSource.Token);

            Assert.Equal(job, result);
            Assert.Equal(endingState, endingState);
            Assert.Equal(2, result.TryCount);
            _logger.VerifyLoggingMessageBeginsWith($"Putting inference job {job.JobId} back to {endingState} state for retry.", LogLevel.Information, Times.Once());
            _inferenceJobRepository.Verify(p => p.SaveChangesAsync(cancellationSource.Token), Times.Once());
        }
예제 #5
0
        public async Task Take_ShallReturnAJob(InferenceJobState initalState, InferenceJobState endingState)
        {
            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.State = initalState;

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());

            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                _fileSystem,
                _inferenceJobRepository.Object);

            var result = await jobStore.Take(cancellationSource.Token);

            Assert.Equal(job, result);
            Assert.Equal(endingState, job.State);
            _logger.VerifyLoggingMessageBeginsWith($"Updating inference job {job.JobId} from {initalState } to {endingState}.", LogLevel.Information, Times.Once());
        }
        private void ConfigureStoragePath(InferenceJob job)
        {
            Guard.Against.Null(job, nameof(job));

            var targetStoragePath = string.Empty;

            if (_fileSystem.Directory.TryGenerateDirectory(_fileSystem.Path.Combine(_configuration.Value.Storage.TemporaryDataDirFullPath, "jobs", $"{job.JobId}"), out targetStoragePath))
            {
                _logger.Log(LogLevel.Information, $"Job payloads directory set to {targetStoragePath}");
                job.SetStoragePath(targetStoragePath);
            }
            else
            {
                throw new JobStoreException($"Failed to generate a temporary storage location");
            }
        }
예제 #7
0
        public async Task UploadsPayloadAndTransitionsState()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.PayloadUploading,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), System.IO.SearchOption.AllDirectories))
            .Returns(new string[] { "/file1", "/file2", "/file3" });
            _payloadsApi.Setup(p => p.Upload(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>()));
            _instanceCleanupQueue.Setup(p => p.QueueInstance(It.IsAny <string>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _logger.VerifyLogging("Uploading 3 files.", LogLevel.Information, Times.Once());
            _logger.VerifyLogging("Upload to payload completed.", LogLevel.Information, Times.Once());

            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once());
            _jobsApi.Verify(p => p.AddMetadata(It.IsAny <Job>(), It.IsAny <Dictionary <string, string> >()), Times.Never());
            _instanceCleanupQueue.Verify(p => p.QueueInstance(It.IsAny <string>()), Times.Exactly(3));
        }
예제 #8
0
        public async Task ShallFailJobOnPayloadUploadException()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.PayloadUploading,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), System.IO.SearchOption.AllDirectories))
            .Returns(new string[] { "/file1", "file2", "file3" });
            _payloadsApi.Setup(p => p.Upload(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <string>()))
            .Throws(new Exception("error"));
            _instanceCleanupQueue.Setup(p => p.QueueInstance(It.IsAny <string>()));

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _logger.VerifyLoggingMessageBeginsWith("Error uploading file:", LogLevel.Error, Times.Exactly(3));
            _logger.VerifyLogging($"Failed to upload {3} files.", LogLevel.Error, Times.Once());

            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Fail, It.IsAny <CancellationToken>()), Times.Once());
            _instanceCleanupQueue.Verify(p => p.QueueInstance(It.IsAny <string>()), Times.Never());
        }
예제 #9
0
        public async Task UploadsMetadataAndTransitionsState()
        {
            var request = new InferenceJob
            {
                JobId     = "1",
                PayloadId = "1",
                State     = InferenceJobState.MetadataUploading,
                Source    = "Source"
            };

            request.SetStoragePath("/job");
            _jobStore.SetupSequence(p => p.Take(It.IsAny <CancellationToken>()))
            .Returns(Task.FromResult(request))
            .Returns(() =>
            {
                _cancellationTokenSource.Cancel();
                throw new OperationCanceledException();
            });
            _jobStore.Setup(p => p.TransitionState(It.IsAny <InferenceJob>(), It.IsAny <InferenceJobStatus>(), It.IsAny <CancellationToken>()));
            _fileSystem.Setup(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <SearchOption>()))
            .Returns(new string[] { "/file1", "/file2" });
            _jobMetadataBuilderFactory.Setup(p => p.Build(It.IsAny <bool>(), It.IsAny <IReadOnlyList <string> >(), It.IsAny <IReadOnlyList <string> >()))
            .Returns(new JobMetadataBuilder()
            {
                { "Test", "TestValue" }
            });

            var service = new JobSubmissionService(
                _instanceCleanupQueue.Object,
                _logger.Object,
                _serviceScopeFactory.Object,
                _fileSystem.Object,
                _configuration);

            await service.StartAsync(_cancellationTokenSource.Token);

            BlockUntilCanceled(_cancellationTokenSource.Token);
            _jobStore.Verify(p => p.TransitionState(request, InferenceJobStatus.Success, It.IsAny <CancellationToken>()), Times.Once());
            _fileSystem.Verify(p => p.Directory.GetFiles(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <SearchOption>()), Times.Once());
            _jobMetadataBuilderFactory.Verify(p => p.Build(It.IsAny <bool>(), It.IsAny <IReadOnlyList <string> >(), It.IsAny <IReadOnlyList <string> >()), Times.Once());
        }
예제 #10
0
        public async Task Add_ShallRetryCopyThenThrow()
        {
            var fileSystem = new Mock <IFileSystem>();

            fileSystem.Setup(p => p.Directory).Returns(_fileSystem.Directory);
            fileSystem.Setup(p => p.Path).Returns(_fileSystem.Path);
            fileSystem.Setup(p => p.File.Create(It.IsAny <string>()))
            .Returns((string path) => _fileSystem.File.Create(path));
            fileSystem.Setup(p => p.File.Copy(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <bool>()))
            .Throws(new IOException("error", ClaraJobRepository.ERROR_DISK_FULL));

            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.Instances.Add(InstanceGenerator.GenerateInstance("./aet", "aet", fileSystem: fileSystem.Object));
            _configuration.Value.Storage.Temporary = "./aet";

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());

            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                fileSystem.Object,
                _inferenceJobRepository.Object);

            await Assert.ThrowsAsync <IOException>(async() => await jobStore.Add(job));

            _logger.VerifyLoggingMessageBeginsWith($"Error copying file to {job.JobPayloadsStoragePath}; destination may be out of disk space, will retry in {1000}ms.", LogLevel.Error, Times.Exactly(3));
            _logger.VerifyLoggingMessageBeginsWith($"Error copying file to {job.JobPayloadsStoragePath}; destination may be out of disk space.  Exceeded maximum retries.", LogLevel.Error, Times.Once());
        }
예제 #11
0
        public async Task Add_ThrowsWhenFailToCopy()
        {
            var fileSystem = new Mock <IFileSystem>();

            fileSystem.Setup(p => p.Directory).Returns(_fileSystem.Directory);
            fileSystem.Setup(p => p.Path).Returns(_fileSystem.Path);
            fileSystem.Setup(p => p.File.Create(It.IsAny <string>()))
            .Returns((string path) => _fileSystem.File.Create(path));
            fileSystem.Setup(p => p.File.Copy(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <bool>())).Throws(new Exception("error"));

            var job = new InferenceJob();

            job.JobId     = Guid.NewGuid().ToString();
            job.PayloadId = Guid.NewGuid().ToString();
            job.SetStoragePath("/path/to/job");
            job.Instances.Add(InstanceGenerator.GenerateInstance("./aet", "aet", fileSystem: fileSystem.Object));
            _configuration.Value.Storage.Temporary = "./aet";

            var cancellationSource = new CancellationTokenSource();

            _inferenceJobRepository.SetupSequence(p => p.AsQueryable())
            .Returns((new List <InferenceJob>()
            {
                job
            }).AsQueryable());

            var jobStore = new ClaraJobRepository(
                _logger.Object,
                _configuration,
                fileSystem.Object,
                _inferenceJobRepository.Object);

            await Assert.ThrowsAsync <Exception>(async() => await jobStore.Add(job));

            _logger.VerifyLoggingMessageBeginsWith($"Failed to copy file {job.JobPayloadsStoragePath}.", LogLevel.Error, Times.Once());
        }