public void verify_id_is_opaque_and_not_contains_blob_id() { var info = new QueueInfo("test", "", "docx"); QueueHandler sut = new QueueHandler(info, _db); StreamReadModel rm = new StreamReadModel() { Filename = new FileNameWithExtension("test.docx"), FormatInfo = new FormatInfo() { DocumentFormat = new DocumentFormat("thumb.small"), BlobId = new BlobId("blob.1"), PipelineId = new PipelineId("thumbnail"), }, DocumentDescriptorId = new DocumentDescriptorId(1), Handle = new DocumentHandle("Revision_2"), }; sut.Handle(rm, new TenantId("test_tenant")); var collection = _db.GetCollection<QueuedJob>("queue.test"); Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1)); var job = collection.AsQueryable().Single(); Assert.That(job.BlobId, Is.EqualTo(new BlobId("blob.1"))); Assert.That(job.TenantId, Is.EqualTo(new TenantId("test_tenant"))); Assert.That(job.DocumentDescriptorId, Is.EqualTo(new DocumentDescriptorId(1))); Assert.That(job.Handle.ToString(), Is.EqualTo(rm.Handle)); Assert.That(job.Id.ToString(), Is.Not.Contains("blob.1"), "Id should not contains internal concempts like blob id"); Assert.That(job.Id.ToString(), Is.Not.Contains("tenant"), "Id should not contains internal concempts like tenant id"); Assert.That(job.Parameters.Keys, Is.Not.Contains(JobKeys.BlobId)); Assert.That(job.Parameters.Keys, Is.Not.Contains(JobKeys.DocumentId)); }
public void verify_should_create_job_mime_extension(String mimetypes, String extensions, String fileName, Boolean expected) { QueueInfo sut = new QueueInfo("TEST", extensions : extensions, mimeTypes: mimetypes); StreamReadModel sr = new StreamReadModel(); sr.Filename = new Core.Model.FileNameWithExtension(fileName); Assert.That(sut.ShouldCreateJob(sr), Is.EqualTo(expected)); }
public QueueHandler(QueueInfo info, IMongoDatabase database) { _collection = database.GetCollection<QueuedJob>("queue." + info.Name); _collection.Indexes.CreateOne( Builders<QueuedJob>.IndexKeys.Ascending(x => x.Status).Ascending(x => x.StreamId).Ascending(x => x.SchedulingTimestamp), new CreateIndexOptions() { Name = "ForGetNextJobQuery" }); _collection.Indexes.CreateOne( Builders<QueuedJob>.IndexKeys.Ascending(x => x.TenantId).Ascending(x => x.BlobId), new CreateIndexOptions() { Name = "TenantAndBlob", Unique = false }); _collection.Indexes.CreateOne( Builders<QueuedJob>.IndexKeys.Ascending(x => x.Handle).Ascending(x => x.Status), new CreateIndexOptions() { Name = "HandleAndStatus", Unique = false }); _info = info; Name = info.Name; _statsAggregationQuery = BsonDocument.Parse(@" { _id : '$Status', c : {$sum:1} }"); //timeout of polling time is the maximum timeout allowed before a job is considered to be locked //but give 10 seconds to each job to start var millisecondTimeout = info.JobLockTimeout * 60 * 1000; _healthCheck = MetricHeartBeatHealthCheck.Create( "Job queue " + info.Name, millisecondTimeout, TimeSpan.FromMilliseconds(millisecondTimeout - 10000)); Logger = NullLogger.Instance; }
public DocumentStoreTestConfigurationForPollQueue( QueueInfo[] queueInfo, String engineVersion = "v3") { IsQueueManager = true; QueueJobsPollInterval = 50; //poll each 50 milliseconds. QueueStreamPollInterval = 50; EngineVersion = engineVersion; this.QueueInfoList = queueInfo; }
public void verify_file_extension_on_handler_filter_exact_extension() { var info = new QueueInfo("test", "", "pdf|doc"); QueueHandler sut = new QueueHandler(info, _db); StreamReadModel rm = new StreamReadModel() { Filename = new FileNameWithExtension("test.docx") }; sut.Handle(rm, new TenantId("test")); var collection = _db.GetCollection<QueuedJob>("queue.test"); Assert.That(collection.AsQueryable().Count(), Is.EqualTo(0)); }
public void queue_with_only_manual_execution( String mimetypes, String extensions, String pipeline, String fileName) { QueueInfo sut = new QueueInfo("TEST", extensions: extensions, mimeTypes: mimetypes, pipeline : pipeline); StreamReadModel sr = new StreamReadModel(); sr.Filename = new Core.Model.FileNameWithExtension(fileName); Assert.That(sut.ShouldCreateJob(sr), Is.EqualTo(false)); }
public QueueHandler(QueueInfo info, IMongoDatabase database) { _collection = database.GetCollection <QueuedJob>("queue." + info.Name); _collection.Indexes.CreateOne( Builders <QueuedJob> .IndexKeys.Ascending(x => x.Status).Ascending(x => x.StreamId).Ascending(x => x.SchedulingTimestamp), new CreateIndexOptions() { Name = "ForGetNextJobQuery" }); _collection.Indexes.CreateOne( Builders <QueuedJob> .IndexKeys.Ascending(x => x.TenantId).Ascending(x => x.BlobId), new CreateIndexOptions() { Name = "TenantAndBlob", Unique = false }); _collection.Indexes.CreateOne( Builders <QueuedJob> .IndexKeys.Ascending(x => x.Handle).Ascending(x => x.Status), new CreateIndexOptions() { Name = "HandleAndStatus", Unique = false }); _info = info; Name = info.Name; _statsAggregationQuery = BsonDocument.Parse(@" { _id : '$Status', c : {$sum:1} }"); //timeout of polling time is the maximum timeout allowed before a job is considered to be locked //but give 10 seconds to each job to start var millisecondTimeout = info.JobLockTimeout * 60 * 1000; _healthCheck = MetricHeartBeatHealthCheck.Create( "Job queue " + info.Name, millisecondTimeout, TimeSpan.FromMilliseconds(millisecondTimeout - 10000)); Logger = NullLogger.Instance; }
private QueueHandler CreateAGenericJob(QueueInfo info, String tenant = "test", Dictionary<String, Object> customData = null) { QueueHandler sut = GetSut(info); HandleStreamToCreateJob(sut, tenant, customData); return sut; }
private QueueHandler GetSut(QueueInfo info) { return new QueueHandler(info, _db); }
public void verify_set_error_status() { var info = new QueueInfo("test", "tika", ""); info.MaxNumberOfFailure = 2; QueueHandler sut = CreateAGenericJob(info); var nextJob = sut.GetNextJob("", "handle", null, null); sut.SetJobExecuted(nextJob.Id, "Error 42"); var collection = _db.GetCollection<QueuedJob>("queue.test"); var job = collection.Find(Builders<QueuedJob>.Filter.Eq(j => j.Id, nextJob.Id)).SingleOrDefault(); Assert.That(job.ExecutionError, Is.EqualTo("Error 42")); Assert.That(job.ErrorCount, Is.EqualTo(1)); Assert.That(job.Status, Is.EqualTo(QueuedJobExecutionStatus.ReQueued)); }
public void verify_job_is_generated_with_custom_parameters() { var info = new QueueInfo("test", "tika", ""); info.Parameters = new Dictionary<string, string>() { { "Custom", "CustomValue" } }; QueueHandler sut = CreateAGenericJob(info); var nextJob = sut.GetNextJob("", "handle", null, null); Assert.That(nextJob.Parameters["Custom"], Is.EqualTo("CustomValue")); }
public void verify_max_number_of_falure() { var info = new QueueInfo("test", "tika", ""); info.MaxNumberOfFailure = 2; QueueHandler sut = CreateAGenericJob(info); var nextJob = sut.GetNextJob("", "handle", null, null); Assert.That(nextJob, Is.Not.Null); var jobId = nextJob.Id; sut.SetJobExecuted(nextJob.Id, "Error 42"); nextJob = sut.GetNextJob("", "handle", null, null); Assert.That(nextJob, Is.Not.Null); sut.SetJobExecuted(nextJob.Id, "Error 42"); nextJob = sut.GetNextJob("", "handle", null, null); Assert.That(nextJob, Is.Null, "After two failure the job should not be returned anymore"); var collection = _db.GetCollection<QueuedJob>("queue.test"); var job = collection.Find(Builders<QueuedJob>.Filter.Eq(j => j.Id, jobId)).SingleOrDefault(); Assert.That(job.ExecutionError, Is.EqualTo("Error 42")); Assert.That(job.ErrorCount, Is.EqualTo(2)); Assert.That(job.Status, Is.EqualTo(QueuedJobExecutionStatus.Failed)); }
public void verify_get_next_job_set_identity() { var info = new QueueInfo("test", "", "pdf|docx"); QueueHandler sut = new QueueHandler(info, _db); StreamReadModel rm = new StreamReadModel() { Id = 1L, Handle = "FirstHandle", Filename = new FileNameWithExtension("test.docx"), EventType = HandleStreamEventTypes.DocumentHasNewFormat, FormatInfo = new FormatInfo() { PipelineId = new PipelineId("soffice"), DocumentFormat = new DocumentFormat("office"), BlobId = new BlobId("soffice.1") }, DocumentDescriptorId = new DocumentDescriptorId(1), }; sut.Handle(rm, new TenantId("test")); rm.Handle = "SecondHandle"; rm.Id = 2L; //This is typical situation when handle is de-duplicated, because //and handle is assigned to another document, but the underling blob id is the same. sut.Handle(rm, new TenantId("test")); var collection = _db.GetCollection<QueuedJob>("queue.test"); //no need to schedule another job Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1)); }
public void verify_pipeline_id_filter() { var info = new QueueInfo("test", "tika", ""); QueueHandler sut = new QueueHandler(info, _db); StreamReadModel rm = new StreamReadModel() { Filename = new FileNameWithExtension("test.docx"), EventType = HandleStreamEventTypes.DocumentHasNewFormat, FormatInfo = new FormatInfo() { PipelineId = new PipelineId("soffice") } }; sut.Handle(rm, new TenantId("test")); var collection = _db.GetCollection<QueuedJob>("queue.test"); Assert.That(collection.AsQueryable().Count(), Is.EqualTo(0), "pipeline filter is not filtering out unwanted pipeline"); rm = new StreamReadModel() { Filename = new FileNameWithExtension("test.docx"), FormatInfo = new FormatInfo() { PipelineId = new PipelineId("tika"), DocumentFormat = new DocumentFormat("tika"), BlobId = new BlobId("tika.1") } }; sut.Handle(rm, new TenantId("test")); Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1), "pipeline filter is not filtering in admitted pipeline"); }
public void verify_filtering_on_mime_types() { var mimeTypeDocx = MimeTypes.GetMimeTypeByExtension("docx"); var info = new QueueInfo("test", mimeTypes: mimeTypeDocx); QueueHandler sut = new QueueHandler(info, _db); StreamReadModel rm = new StreamReadModel() { Filename = new FileNameWithExtension("test.pdf"), FormatInfo = new FormatInfo() { DocumentFormat = new DocumentFormat("thumb.small"), BlobId = new BlobId("blob.1"), PipelineId = new PipelineId("thumbnail") } }; sut.Handle(rm, new TenantId("test")); var collection = _db.GetCollection<QueuedJob>("queue.test"); Assert.That(collection.AsQueryable().Count(), Is.EqualTo(0)); rm = new StreamReadModel() { Filename = new FileNameWithExtension("test.docx"), FormatInfo = new FormatInfo() { DocumentFormat = new DocumentFormat("thumb.small"), BlobId = new BlobId("blob.1"), PipelineId = new PipelineId("thumbnail") } }; sut.Handle(rm, new TenantId("test")); Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1)); }
public void verify_job_parameters_contains_mime_type() { var info = new QueueInfo("test", "", "docx"); QueueHandler sut = new QueueHandler(info, _db); StreamReadModel rm = new StreamReadModel() { Filename = new FileNameWithExtension("test.docx"), FormatInfo = new FormatInfo() { DocumentFormat = new DocumentFormat("thumb.small"), BlobId = new BlobId("blob.1"), PipelineId = new PipelineId("thumbnail"), }, DocumentDescriptorId = new DocumentDescriptorId(1), }; sut.Handle(rm, new TenantId("test_tenant")); var collection = _db.GetCollection<QueuedJob>("queue.test"); Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1)); var job = collection.AsQueryable().Single(); Assert.That(job.BlobId, Is.EqualTo(new BlobId("blob.1"))); Assert.That(job.Parameters[JobKeys.MimeType], Is.EqualTo(MimeTypes.GetMimeTypeByExtension("docx"))); }
public void verify_job_created_with_handle_metadata() { var info = new QueueInfo("test", "", "pdf|docx"); QueueHandler sut = new QueueHandler(info, _db); var customData = new DocumentCustomData() { {"test" , "value"}, {"complex" , 42}, }; StreamReadModel rm = new StreamReadModel() { Id = 1L, Handle = "FirstHandle", Filename = new FileNameWithExtension("test.docx"), EventType = HandleStreamEventTypes.DocumentHasNewFormat, FormatInfo = new FormatInfo() { PipelineId = new PipelineId("soffice"), DocumentFormat = new DocumentFormat("office"), BlobId = new BlobId("soffice.1") }, DocumentDescriptorId = new DocumentDescriptorId(1), DocumentCustomData = customData, }; sut.Handle(rm, new TenantId("test")); var collection = _db.GetCollection<QueuedJob>("queue.test"); Assert.That(collection.AsQueryable().Single().HandleCustomData, Is.EquivalentTo(customData)); }