コード例 #1
0
        public void CanExtractUTF8TextContents()
        {
            var sampleTextFileContent = "This is just some sample text that could be in a file.";
            var extractor             = new TextFileContentExtractor();
            var extractedContent      = extractor.ExtractTextContents(Encoding.UTF8.GetBytes(sampleTextFileContent));

            Assert.Equal(sampleTextFileContent, extractedContent);
        }
コード例 #2
0
        public async Task ExecutesFullIntegrationPass()
        {
            var jobStatusStore = new JobStatusStore(Configuration.StorageConnectionString, Configuration.JobStatusContainerName);
            var blobReader     = new AzureBlobReader(Configuration.StorageConnectionString, Configuration.FileUploadContainerName);
            var extractor      = new TextFileContentExtractor();
            var searchIndex    = new AzureSearchIndex(Configuration.SearchServiceName, Configuration.SearchAdminKey);
            var docScorer      = new TextDocumentScorer(searchIndex);
            var workflow       = new ParsingWorkflow(jobStatusStore, blobReader, extractor, searchIndex, docScorer);

            var blobId  = Guid.NewGuid().ToString().Replace("-", String.Empty);
            var jobId   = Guid.NewGuid().ToString().Replace("-", String.Empty);
            var blobUri = String.Format("{0}/{1}", jobId, blobId);

            var blobDetails = new BlobDetails();

            blobDetails.ContainerName = Configuration.FileUploadContainerName;
            blobDetails.FullBlobPath  = blobUri;
            blobDetails.DocumentId    = blobId;
            blobDetails.JobId         = jobId;

            var job = new JobStatus();

            job.OriginalFileName = "not-real-file.txt";
            job.IsComplete       = false;
            job.JobStartTime     = DateTime.UtcNow;
            job.JobId            = jobId;

            await jobStatusStore.UpdateStatusAsync(job);

            await createSampleBlob(blobUri);

            await workflow.ExecuteAsync(blobDetails);

            job = await jobStatusStore.ReadStatusAsync(jobId);

            var categoryCount = job.Categories.Length;

            Assert.Equal(1, categoryCount);
            Assert.Equal("Heavy Hitter", job.Categories[0]);
        }