Esempio n. 1
0
        public async Task <ActionResult <DocumentVM> > CreateDocumentContents([FromRoute] Guid id, [FromBody] DocumentContentVM documentContentVM)
        {
            // Validation
            if (!ModelState.IsValid)
            {
                return(BadRequest(ModelState));
            }

            // Mapping
            DocumentContent documentContent = this.mapper.Map <DocumentContentVM, DocumentContent>(documentContentVM);

            Document document = await this.bll.CreateDocumentContentAsync(id, documentContent);

            // Mapping
            return(Ok(this.mapper.Map <Document, DocumentVM>(document)));
        }
Esempio n. 2
0
        private async Task HandleMessage(string content)
        {
            List <DocumentContentVM> documentContentVMs = new List <DocumentContentVM>();

            this.logger.LogInformation($"Received content: {content}");
            this.logger.LogInformation("Parsing content to Document");

            Models.Document document = JsonConvert.DeserializeObject <Models.Document>(content);

            this.logger.LogInformation($"Content parsed to Document model: {document}");
            this.logger.LogInformation($"Document mime type: {document.MimeType}");

            switch (document.MimeType)
            {
            // Text
            case "text/plain":
                this.logger.LogInformation($"Start text detection of document with ID: {document.Id}");

                // Retrieve file
                GetObjectResponse response = await this.awsS3Service.DownloadFile(document.Path);

                this.logger.LogInformation("File downloaded from AWS S3 storage");
                this.logger.LogInformation("Reading contents of file");

                // Read contents
                using (StreamReader streamReader = new StreamReader(response.ResponseStream))
                {
                    while (streamReader.Peek() != -1)
                    {
                        string line = streamReader.ReadLine();

                        // Check contents of line
                        if (string.IsNullOrEmpty(line))
                        {
                            continue;
                        }

                        // Found a line

                        // Checking for words in line
                        string[] words = line.Split(" ");

                        if (words.Length > 1)
                        {
                            // If multiple words in the line, add line as Content
                            DocumentContentVM newLineDocumentContentVM = new DocumentContentVM()
                            {
                                DocumentId = document.Id,
                                Text       = line,
                                TextType   = DocumentContentTextType.LINE
                            };

                            documentContentVMs.Add(newLineDocumentContentVM);

                            // Add the words seperatly
                            foreach (string word in words)
                            {
                                if (string.IsNullOrEmpty(word))
                                {
                                    continue;
                                }

                                DocumentContentVM newWordDocumentContentVM = new DocumentContentVM()
                                {
                                    DocumentId = document.Id,
                                    Text       = word,
                                    TextType   = DocumentContentTextType.WORD
                                };

                                documentContentVMs.Add(newWordDocumentContentVM);
                            }
                        }
                        else
                        {
                            // If only 1 word in the line, add word as Content
                            DocumentContentVM newWordDocumentContentVM = new DocumentContentVM()
                            {
                                DocumentId = document.Id,
                                Text       = line,
                                TextType   = DocumentContentTextType.WORD
                            };

                            documentContentVMs.Add(newWordDocumentContentVM);
                        }
                    }
                    ;
                }

                break;

            // PDF, JPG, PNG
            case "application/pdf":
            case "image/jpeg":
            case "image/png":
                this.logger.LogInformation($"Amazon Textract - Start text detection of document with ID: {document.Id}");

                string jobId = await this.awsTextractService.StartDocumentTextDetection(
                    this.configuration.GetSection("AWS")
                    .GetSection("S3")
                    .GetSection("Bucket")
                    .GetValue <string>("Name"),
                    document.Path
                    );

                this.logger.LogInformation($"Amazon Textract - Created a new job with ID: {jobId}");
                this.logger.LogInformation("Amazon Textract - Waiting for job completion");

                await this.awsTextractService.WaitForJobCompletion(jobId);

                this.logger.LogInformation($"Amazon Textract - Job with ID: {jobId} is completed");
                this.logger.LogInformation($"Amazon Textract - Retrieving results for job with ID: {jobId}");

                List <GetDocumentTextDetectionResponse> textDetectionResponses = await this.awsTextractService.GetJobResult(jobId);

                this.logger.LogInformation("Amazon Textract - Saving results to JSON text file");

                string jsonResponse = JsonConvert.SerializeObject(textDetectionResponses, Formatting.Indented);
                File.WriteAllText($"Results/DocumentParser/{DateTime.Now.ToString("yyyyMMdd_HHmmss")}_{document.Id}.textracted.json", jsonResponse);

                this.logger.LogInformation("Amazon Textract - Converting results to document contents");

                foreach (GetDocumentTextDetectionResponse textDetectionResponse in textDetectionResponses)
                {
                    foreach (Block block in textDetectionResponse.Blocks)
                    {
                        if (!string.IsNullOrEmpty(block.Text))
                        {
                            DocumentContentVM newDocumentContentVM = new DocumentContentVM()
                            {
                                DocumentId = document.Id,
                                Text       = block.Text,
                                Confidence = block.Confidence
                            };

                            if (block.BlockType.Equals("WORD"))
                            {
                                newDocumentContentVM.TextType = DocumentContentTextType.WORD;
                            }
                            if (block.BlockType.Equals("LINE"))
                            {
                                newDocumentContentVM.TextType = DocumentContentTextType.LINE;
                            }

                            documentContentVMs.Add(newDocumentContentVM);
                        }
                    }
                }

                break;
            }

            if (documentContentVMs.Any())
            {
                this.logger.LogInformation($"Text detected in document with ID: {document.Id}");

                this.logger.LogInformation($"Saving results in database through API");

                HttpResponseMessage httpResponseMessage = await this.apiService.DocumentsCreateDocumentContents(document.Id, documentContentVMs);

                if (httpResponseMessage.IsSuccessStatusCode)
                {
                    this.logger.LogInformation($"Saved in database");
                }
            }
            else
            {
                this.logger.LogInformation($"No text detected in document with ID: {document.Id}");
            }

            this.logger.LogInformation($"Document with ID: {document.Id} parsed");
        }