public void verify_id_is_opaque_and_not_contains_blob_id()
 {
     var info = new QueueInfo("test", "", "docx");
     QueueHandler sut = new QueueHandler(info, _db);
     StreamReadModel rm = new StreamReadModel()
     {
         Filename = new FileNameWithExtension("test.docx"),
         FormatInfo = new FormatInfo()
         {
             DocumentFormat = new DocumentFormat("thumb.small"),
             BlobId = new BlobId("blob.1"),
             PipelineId = new PipelineId("thumbnail"),
         },
         DocumentDescriptorId = new DocumentDescriptorId(1),
         Handle = new DocumentHandle("Revision_2"),
     };
     sut.Handle(rm, new TenantId("test_tenant"));
     var collection = _db.GetCollection<QueuedJob>("queue.test");
     Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1));
     var job = collection.AsQueryable().Single();
     Assert.That(job.BlobId, Is.EqualTo(new BlobId("blob.1")));
     Assert.That(job.TenantId, Is.EqualTo(new TenantId("test_tenant")));
     Assert.That(job.DocumentDescriptorId, Is.EqualTo(new DocumentDescriptorId(1)));
     Assert.That(job.Handle.ToString(), Is.EqualTo(rm.Handle));
     Assert.That(job.Id.ToString(), Is.Not.Contains("blob.1"), "Id should not contains internal concempts like blob id");
     Assert.That(job.Id.ToString(), Is.Not.Contains("tenant"), "Id should not contains internal concempts like tenant id");
     Assert.That(job.Parameters.Keys, Is.Not.Contains(JobKeys.BlobId));
     Assert.That(job.Parameters.Keys, Is.Not.Contains(JobKeys.DocumentId));
 }
 public void verify_should_create_job_mime_extension(String mimetypes, String extensions, String fileName, Boolean expected)
 {
     QueueInfo sut = new QueueInfo("TEST", extensions : extensions, mimeTypes: mimetypes);
     StreamReadModel sr = new StreamReadModel();
     sr.Filename = new Core.Model.FileNameWithExtension(fileName);
     Assert.That(sut.ShouldCreateJob(sr), Is.EqualTo(expected));
 }
        public QueueHandler(QueueInfo info, IMongoDatabase database)
        {
            _collection = database.GetCollection<QueuedJob>("queue." + info.Name);
            _collection.Indexes.CreateOne(
                Builders<QueuedJob>.IndexKeys.Ascending(x => x.Status).Ascending(x => x.StreamId).Ascending(x => x.SchedulingTimestamp),
                new CreateIndexOptions() { Name = "ForGetNextJobQuery" });
        
            _collection.Indexes.CreateOne(
               Builders<QueuedJob>.IndexKeys.Ascending(x => x.TenantId).Ascending(x => x.BlobId),
                new CreateIndexOptions() { Name = "TenantAndBlob", Unique = false });

            _collection.Indexes.CreateOne(
             Builders<QueuedJob>.IndexKeys.Ascending(x => x.Handle).Ascending(x => x.Status),
              new CreateIndexOptions() { Name = "HandleAndStatus", Unique = false });

            _info = info;
            Name = info.Name;

            _statsAggregationQuery = BsonDocument.Parse(@" 
       { 
          _id : '$Status',
          c : {$sum:1}
       }");
            //timeout of polling time is the maximum timeout allowed before a job is considered to be locked
            //but give 10 seconds to each job to start
            var millisecondTimeout = info.JobLockTimeout * 60 * 1000;
            _healthCheck = MetricHeartBeatHealthCheck.Create(
                "Job queue " + info.Name,
                millisecondTimeout,
                TimeSpan.FromMilliseconds(millisecondTimeout - 10000));
                
            Logger = NullLogger.Instance;
        }
 public DocumentStoreTestConfigurationForPollQueue( QueueInfo[] queueInfo, String engineVersion = "v3")
 {
     IsQueueManager = true;
     QueueJobsPollInterval = 50; //poll each 50 milliseconds.
     QueueStreamPollInterval = 50;
     EngineVersion = engineVersion;
     this.QueueInfoList = queueInfo;
 }
 public void verify_file_extension_on_handler_filter_exact_extension()
 {
     var info = new QueueInfo("test", "", "pdf|doc");
     QueueHandler sut = new QueueHandler(info, _db);
     StreamReadModel rm = new StreamReadModel()
     {
         Filename = new FileNameWithExtension("test.docx")
     };
     sut.Handle(rm, new TenantId("test"));
     var collection = _db.GetCollection<QueuedJob>("queue.test");
     Assert.That(collection.AsQueryable().Count(), Is.EqualTo(0));
 }
 public void queue_with_only_manual_execution(
     String mimetypes, 
     String extensions,
     String pipeline, 
     String fileName)
 {
     QueueInfo sut = new QueueInfo("TEST", 
         extensions: extensions, 
         mimeTypes: mimetypes,
         pipeline : pipeline);
     StreamReadModel sr = new StreamReadModel();
     sr.Filename = new Core.Model.FileNameWithExtension(fileName);
     Assert.That(sut.ShouldCreateJob(sr), Is.EqualTo(false));
 }
        public QueueHandler(QueueInfo info, IMongoDatabase database)
        {
            _collection = database.GetCollection <QueuedJob>("queue." + info.Name);
            _collection.Indexes.CreateOne(
                Builders <QueuedJob> .IndexKeys.Ascending(x => x.Status).Ascending(x => x.StreamId).Ascending(x => x.SchedulingTimestamp),
                new CreateIndexOptions()
            {
                Name = "ForGetNextJobQuery"
            });

            _collection.Indexes.CreateOne(
                Builders <QueuedJob> .IndexKeys.Ascending(x => x.TenantId).Ascending(x => x.BlobId),
                new CreateIndexOptions()
            {
                Name = "TenantAndBlob", Unique = false
            });

            _collection.Indexes.CreateOne(
                Builders <QueuedJob> .IndexKeys.Ascending(x => x.Handle).Ascending(x => x.Status),
                new CreateIndexOptions()
            {
                Name = "HandleAndStatus", Unique = false
            });

            _info = info;
            Name  = info.Name;

            _statsAggregationQuery = BsonDocument.Parse(@" 
       { 
          _id : '$Status',
          c : {$sum:1}
       }");
            //timeout of polling time is the maximum timeout allowed before a job is considered to be locked
            //but give 10 seconds to each job to start
            var millisecondTimeout = info.JobLockTimeout * 60 * 1000;

            _healthCheck = MetricHeartBeatHealthCheck.Create(
                "Job queue " + info.Name,
                millisecondTimeout,
                TimeSpan.FromMilliseconds(millisecondTimeout - 10000));

            Logger = NullLogger.Instance;
        }
 private QueueHandler CreateAGenericJob(QueueInfo info, String tenant = "test", Dictionary<String, Object> customData = null)
 {
     QueueHandler sut = GetSut(info);
     HandleStreamToCreateJob(sut, tenant, customData);
     return sut;
 }
 private QueueHandler GetSut(QueueInfo info)
 {
     return new QueueHandler(info, _db);
 }
 public void verify_set_error_status()
 {
     var info = new QueueInfo("test", "tika", "");
     info.MaxNumberOfFailure = 2;
     QueueHandler sut = CreateAGenericJob(info);
     var nextJob = sut.GetNextJob("", "handle", null, null);
     sut.SetJobExecuted(nextJob.Id, "Error 42");
     var collection = _db.GetCollection<QueuedJob>("queue.test");
     var job = collection.Find(Builders<QueuedJob>.Filter.Eq(j => j.Id, nextJob.Id)).SingleOrDefault();
     Assert.That(job.ExecutionError, Is.EqualTo("Error 42"));
     Assert.That(job.ErrorCount, Is.EqualTo(1));
     Assert.That(job.Status, Is.EqualTo(QueuedJobExecutionStatus.ReQueued));
 }
        public void verify_job_is_generated_with_custom_parameters()
        {
            var info = new QueueInfo("test", "tika", "");
            info.Parameters = new Dictionary<string, string>() { { "Custom", "CustomValue" } };
            QueueHandler sut = CreateAGenericJob(info);

            var nextJob = sut.GetNextJob("", "handle", null, null);
            Assert.That(nextJob.Parameters["Custom"], Is.EqualTo("CustomValue"));
        }
        public void verify_max_number_of_falure()
        {
            var info = new QueueInfo("test", "tika", "");
            info.MaxNumberOfFailure = 2;
            QueueHandler sut = CreateAGenericJob(info);

            var nextJob = sut.GetNextJob("", "handle", null, null);
            Assert.That(nextJob, Is.Not.Null);
            var jobId = nextJob.Id;

            sut.SetJobExecuted(nextJob.Id, "Error 42");
            nextJob = sut.GetNextJob("", "handle", null, null);
            Assert.That(nextJob, Is.Not.Null);
            sut.SetJobExecuted(nextJob.Id, "Error 42");
            nextJob = sut.GetNextJob("", "handle", null, null);
            Assert.That(nextJob, Is.Null, "After two failure the job should not be returned anymore");

            var collection = _db.GetCollection<QueuedJob>("queue.test");
            var job = collection.Find(Builders<QueuedJob>.Filter.Eq(j => j.Id, jobId)).SingleOrDefault();
            Assert.That(job.ExecutionError, Is.EqualTo("Error 42"));
            Assert.That(job.ErrorCount, Is.EqualTo(2));
            Assert.That(job.Status, Is.EqualTo(QueuedJobExecutionStatus.Failed));
        }
        public void verify_get_next_job_set_identity()
        {
            var info = new QueueInfo("test", "", "pdf|docx");
            QueueHandler sut = new QueueHandler(info, _db);
            StreamReadModel rm = new StreamReadModel()
            {
                Id = 1L,
                Handle = "FirstHandle",
                Filename = new FileNameWithExtension("test.docx"),
                EventType = HandleStreamEventTypes.DocumentHasNewFormat,
                FormatInfo = new FormatInfo()
                {
                    PipelineId = new PipelineId("soffice"),
                    DocumentFormat = new DocumentFormat("office"),
                    BlobId = new BlobId("soffice.1")
                },
                DocumentDescriptorId = new DocumentDescriptorId(1),
            };

            sut.Handle(rm, new TenantId("test"));
            rm.Handle = "SecondHandle";
            rm.Id = 2L;
            //This is typical situation when handle is de-duplicated, because
            //and handle is assigned to another document, but the underling blob id is the same.
            sut.Handle(rm, new TenantId("test"));
            var collection = _db.GetCollection<QueuedJob>("queue.test");
            //no need to schedule another job
            Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1));
        }
        public void verify_pipeline_id_filter()
        {
            var info = new QueueInfo("test", "tika", "");
            QueueHandler sut = new QueueHandler(info, _db);
            StreamReadModel rm = new StreamReadModel()
            {
                Filename = new FileNameWithExtension("test.docx"),
                EventType = HandleStreamEventTypes.DocumentHasNewFormat,
                FormatInfo = new FormatInfo()
                {
                    PipelineId = new PipelineId("soffice")
                }
            };
            sut.Handle(rm, new TenantId("test"));
            var collection = _db.GetCollection<QueuedJob>("queue.test");
            Assert.That(collection.AsQueryable().Count(), Is.EqualTo(0), "pipeline filter is not filtering out unwanted pipeline");

            rm = new StreamReadModel()
            {
                Filename = new FileNameWithExtension("test.docx"),
                FormatInfo = new FormatInfo()
                {
                    PipelineId = new PipelineId("tika"),
                    DocumentFormat = new DocumentFormat("tika"),
                    BlobId = new BlobId("tika.1")
                }
            };
            sut.Handle(rm, new TenantId("test"));

            Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1), "pipeline filter is not filtering in admitted pipeline");

        }
        public void verify_filtering_on_mime_types()
        {
            var mimeTypeDocx = MimeTypes.GetMimeTypeByExtension("docx");
            var info = new QueueInfo("test", mimeTypes: mimeTypeDocx);
            QueueHandler sut = new QueueHandler(info, _db);
            StreamReadModel rm = new StreamReadModel()
            {
                Filename = new FileNameWithExtension("test.pdf"),
                FormatInfo = new FormatInfo()
                {
                    DocumentFormat = new DocumentFormat("thumb.small"),
                    BlobId = new BlobId("blob.1"),
                    PipelineId = new PipelineId("thumbnail")
                }
            };
            sut.Handle(rm, new TenantId("test"));
            var collection = _db.GetCollection<QueuedJob>("queue.test");
            Assert.That(collection.AsQueryable().Count(), Is.EqualTo(0));

            rm = new StreamReadModel()
            {
                Filename = new FileNameWithExtension("test.docx"),
                FormatInfo = new FormatInfo()
                {
                    DocumentFormat = new DocumentFormat("thumb.small"),
                    BlobId = new BlobId("blob.1"),
                    PipelineId = new PipelineId("thumbnail")
                }
            };
            sut.Handle(rm, new TenantId("test"));

            Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1));
        }
 public void verify_job_parameters_contains_mime_type()
 {
     var info = new QueueInfo("test", "", "docx");
     QueueHandler sut = new QueueHandler(info, _db);
     StreamReadModel rm = new StreamReadModel()
     {
         Filename = new FileNameWithExtension("test.docx"),
         FormatInfo = new FormatInfo()
         {
             DocumentFormat = new DocumentFormat("thumb.small"),
             BlobId = new BlobId("blob.1"),
             PipelineId = new PipelineId("thumbnail"),
         },
         DocumentDescriptorId = new DocumentDescriptorId(1),
     };
     sut.Handle(rm, new TenantId("test_tenant"));
     var collection = _db.GetCollection<QueuedJob>("queue.test");
     Assert.That(collection.AsQueryable().Count(), Is.EqualTo(1));
     var job = collection.AsQueryable().Single();
     Assert.That(job.BlobId, Is.EqualTo(new BlobId("blob.1")));
     Assert.That(job.Parameters[JobKeys.MimeType], Is.EqualTo(MimeTypes.GetMimeTypeByExtension("docx")));
 }   
        public void verify_job_created_with_handle_metadata()
        {
            var info = new QueueInfo("test", "", "pdf|docx");
            QueueHandler sut = new QueueHandler(info, _db);
            var customData = new DocumentCustomData() 
                {
                    {"test" , "value"},
                    {"complex" , 42},
                };
            StreamReadModel rm = new StreamReadModel()
            {
                Id = 1L,
                Handle = "FirstHandle",
                Filename = new FileNameWithExtension("test.docx"),
                EventType = HandleStreamEventTypes.DocumentHasNewFormat,
                FormatInfo = new FormatInfo()
                {
                    PipelineId = new PipelineId("soffice"),
                    DocumentFormat = new DocumentFormat("office"),
                    BlobId = new BlobId("soffice.1")
                },
                DocumentDescriptorId = new DocumentDescriptorId(1),
                DocumentCustomData = customData,
            };

            sut.Handle(rm, new TenantId("test"));

            var collection = _db.GetCollection<QueuedJob>("queue.test");
            Assert.That(collection.AsQueryable().Single().HandleCustomData, Is.EquivalentTo(customData));
        }