public void CanExtractUTF8TextContents() { var sampleTextFileContent = "This is just some sample text that could be in a file."; var extractor = new TextFileContentExtractor(); var extractedContent = extractor.ExtractTextContents(Encoding.UTF8.GetBytes(sampleTextFileContent)); Assert.Equal(sampleTextFileContent, extractedContent); }
public async Task ExecutesFullIntegrationPass() { var jobStatusStore = new JobStatusStore(Configuration.StorageConnectionString, Configuration.JobStatusContainerName); var blobReader = new AzureBlobReader(Configuration.StorageConnectionString, Configuration.FileUploadContainerName); var extractor = new TextFileContentExtractor(); var searchIndex = new AzureSearchIndex(Configuration.SearchServiceName, Configuration.SearchAdminKey); var docScorer = new TextDocumentScorer(searchIndex); var workflow = new ParsingWorkflow(jobStatusStore, blobReader, extractor, searchIndex, docScorer); var blobId = Guid.NewGuid().ToString().Replace("-", String.Empty); var jobId = Guid.NewGuid().ToString().Replace("-", String.Empty); var blobUri = String.Format("{0}/{1}", jobId, blobId); var blobDetails = new BlobDetails(); blobDetails.ContainerName = Configuration.FileUploadContainerName; blobDetails.FullBlobPath = blobUri; blobDetails.DocumentId = blobId; blobDetails.JobId = jobId; var job = new JobStatus(); job.OriginalFileName = "not-real-file.txt"; job.IsComplete = false; job.JobStartTime = DateTime.UtcNow; job.JobId = jobId; await jobStatusStore.UpdateStatusAsync(job); await createSampleBlob(blobUri); await workflow.ExecuteAsync(blobDetails); job = await jobStatusStore.ReadStatusAsync(jobId); var categoryCount = job.Categories.Length; Assert.Equal(1, categoryCount); Assert.Equal("Heavy Hitter", job.Categories[0]); }