public async Task <ActionResult <DocumentVM> > CreateDocumentContents([FromRoute] Guid id, [FromBody] DocumentContentVM documentContentVM) { // Validation if (!ModelState.IsValid) { return(BadRequest(ModelState)); } // Mapping DocumentContent documentContent = this.mapper.Map <DocumentContentVM, DocumentContent>(documentContentVM); Document document = await this.bll.CreateDocumentContentAsync(id, documentContent); // Mapping return(Ok(this.mapper.Map <Document, DocumentVM>(document))); }
private async Task HandleMessage(string content) { List <DocumentContentVM> documentContentVMs = new List <DocumentContentVM>(); this.logger.LogInformation($"Received content: {content}"); this.logger.LogInformation("Parsing content to Document"); Models.Document document = JsonConvert.DeserializeObject <Models.Document>(content); this.logger.LogInformation($"Content parsed to Document model: {document}"); this.logger.LogInformation($"Document mime type: {document.MimeType}"); switch (document.MimeType) { // Text case "text/plain": this.logger.LogInformation($"Start text detection of document with ID: {document.Id}"); // Retrieve file GetObjectResponse response = await this.awsS3Service.DownloadFile(document.Path); this.logger.LogInformation("File downloaded from AWS S3 storage"); this.logger.LogInformation("Reading contents of file"); // Read contents using (StreamReader streamReader = new StreamReader(response.ResponseStream)) { while (streamReader.Peek() != -1) { string line = streamReader.ReadLine(); // Check contents of line if (string.IsNullOrEmpty(line)) { continue; } // Found a line // Checking for words in line string[] words = line.Split(" "); if (words.Length > 1) { // If multiple words in the line, add line as Content DocumentContentVM newLineDocumentContentVM = new DocumentContentVM() { DocumentId = document.Id, Text = line, TextType = DocumentContentTextType.LINE }; documentContentVMs.Add(newLineDocumentContentVM); // Add the words seperatly foreach (string word in words) { if (string.IsNullOrEmpty(word)) { continue; } DocumentContentVM newWordDocumentContentVM = new DocumentContentVM() { DocumentId = document.Id, Text = word, TextType = DocumentContentTextType.WORD }; documentContentVMs.Add(newWordDocumentContentVM); } } else { // If only 1 word in the line, add word as Content DocumentContentVM newWordDocumentContentVM = new DocumentContentVM() { DocumentId = document.Id, Text = line, TextType = DocumentContentTextType.WORD }; documentContentVMs.Add(newWordDocumentContentVM); } } ; } break; // PDF, JPG, PNG case "application/pdf": case "image/jpeg": case "image/png": this.logger.LogInformation($"Amazon Textract - Start text detection of document with ID: {document.Id}"); string jobId = await this.awsTextractService.StartDocumentTextDetection( this.configuration.GetSection("AWS") .GetSection("S3") .GetSection("Bucket") .GetValue <string>("Name"), document.Path ); this.logger.LogInformation($"Amazon Textract - Created a new job with ID: {jobId}"); this.logger.LogInformation("Amazon Textract - Waiting for job completion"); await this.awsTextractService.WaitForJobCompletion(jobId); this.logger.LogInformation($"Amazon Textract - Job with ID: {jobId} is completed"); this.logger.LogInformation($"Amazon Textract - Retrieving results for job with ID: {jobId}"); List <GetDocumentTextDetectionResponse> textDetectionResponses = await this.awsTextractService.GetJobResult(jobId); this.logger.LogInformation("Amazon Textract - Saving results to JSON text file"); string jsonResponse = JsonConvert.SerializeObject(textDetectionResponses, Formatting.Indented); File.WriteAllText($"Results/DocumentParser/{DateTime.Now.ToString("yyyyMMdd_HHmmss")}_{document.Id}.textracted.json", jsonResponse); this.logger.LogInformation("Amazon Textract - Converting results to document contents"); foreach (GetDocumentTextDetectionResponse textDetectionResponse in textDetectionResponses) { foreach (Block block in textDetectionResponse.Blocks) { if (!string.IsNullOrEmpty(block.Text)) { DocumentContentVM newDocumentContentVM = new DocumentContentVM() { DocumentId = document.Id, Text = block.Text, Confidence = block.Confidence }; if (block.BlockType.Equals("WORD")) { newDocumentContentVM.TextType = DocumentContentTextType.WORD; } if (block.BlockType.Equals("LINE")) { newDocumentContentVM.TextType = DocumentContentTextType.LINE; } documentContentVMs.Add(newDocumentContentVM); } } } break; } if (documentContentVMs.Any()) { this.logger.LogInformation($"Text detected in document with ID: {document.Id}"); this.logger.LogInformation($"Saving results in database through API"); HttpResponseMessage httpResponseMessage = await this.apiService.DocumentsCreateDocumentContents(document.Id, documentContentVMs); if (httpResponseMessage.IsSuccessStatusCode) { this.logger.LogInformation($"Saved in database"); } } else { this.logger.LogInformation($"No text detected in document with ID: {document.Id}"); } this.logger.LogInformation($"Document with ID: {document.Id} parsed"); }