// result can be stored in .tsv // (which does not use , or ; as separator - which can be appeared in key/value) public string CreateKeyValueText(GetDocumentAnalysisResponse response) { StringBuilder sb = new StringBuilder(); string seperator = "\t"; sb.AppendLine("Key" + seperator + "Value"); var document = new TextractDocument(response); document.Pages.ForEach(page => { page.Form.Fields.ForEach(f => { sb.AppendLine(f.Key + seperator + f.Value); }); }); return(sb.ToString()); }
internal void Handle(string bucketName, string formFile, string localFolder, string localFile) { var task = textractAnalysisService.StartDocumentAnalysis(bucketName, formFile, "FORMS"); var jobId = task.Result; textractAnalysisService.WaitForJobCompletion(jobId); var results = textractAnalysisService.GetJobResults(jobId); var redactableImage = Path.Join(localFolder, "redacted-" + formFile); if (File.Exists(redactableImage)) { File.Delete(redactableImage); } File.Copy(localFile, redactableImage); var image = Image.FromFile(redactableImage); var graphics = Graphics.FromImage(image); var height = image.Height; var width = image.Width; Console.WriteLine("image dimensions: {0}x{1}", width, height); var document = new TextractDocument(results); document.Pages.ForEach(page => { page.Form.Fields.ForEach(field => { if (field.Key.Text.ToLower().Contains("address")) { Console.WriteLine("Redacting Key: {0}, Value: {1}", field.Key.Text, field.Value.Text); var bb = field.Value.Geometry.BoundingBox; Console.WriteLine(bb); var x1 = bb.Left * width; var y1 = bb.Top * height - 2; var x2 = bb.Width * width + 2; var y2 = bb.Height * height + 2; Console.WriteLine("x1: {0}, x2: {1}, y1: {2}, y2: {3}", x1, x2, y1, y2); graphics.FillRectangle(new SolidBrush(Color.Black), x1, y1, x2, y2); graphics.Save(); image.Save(redactableImage); Console.WriteLine("redacted image saved at: {0}", redactableImage); } }); }); }
public async Task <string> ReadDocumentTable(string documentKey) { _logger.Information("Started analyzing document"); var jobId = await _analysisService.StartDocumentAnalysis(_s3Settings.S3BucketName ?? string.Empty, documentKey, "TABLES"); await _analysisService.WaitForJobCompletion(jobId); var results = await _analysisService.GetJobResults(jobId); _logger.Information("Finished analyzing document"); if (results.JobStatus == JobStatus.FAILED) { return(string.Empty); } var document = new TextractDocument(results); var stringBuilder = new StringBuilder(); foreach (var page in document.Pages) { foreach (var table in page.Tables) { var rowIndex = 0; foreach (var row in table.Rows) { rowIndex++; var cellIndex = 0; foreach (var cell in row.Cells) { cellIndex++; stringBuilder.Append($"Table [{rowIndex}][{cellIndex}] = {cell.Text}").Append(Environment.NewLine); } } } } return(stringBuilder.ToString()); }
internal void Handle(string bucketName, string formFile) { var task = textractAnalysisService.StartDocumentAnalysis(bucketName, formFile, "FORMS"); var jobId = task.Result; textractAnalysisService.WaitForJobCompletion(jobId); var results = textractAnalysisService.GetJobResults(jobId); var document = new TextractDocument(results); document.Pages.ForEach(page => { page.Form.Fields.ForEach(f => { Console.WriteLine("Key: {0}, Value {1}", f.Key, f.Value); }); Console.WriteLine("Get Field by Key:"); var key = "Phone Number:"; var field = page.Form.GetFieldByKey(key); if (field != null) { Console.WriteLine("Key: {0}, Value: {1}", field.Key, field.Value); } }); }
internal void Handle(string bucketName, string expenseFile) { var task = textractAnalysisService.StartDocumentAnalysis(bucketName, expenseFile, "TABLES"); var jobId = task.Result; textractAnalysisService.WaitForJobCompletion(jobId); var results = textractAnalysisService.GetJobResults(jobId); var warnings = new StringBuilder(); float expense; var lineItem = new List <string>(); var document = new TextractDocument(results); document.Pages.ForEach(page => { page.Tables.ForEach(table => { var r = 0; table.Rows.ForEach(row => { r++; var itemName = string.Empty; var c = 0; row.Cells.ForEach(cell => { c++; Console.WriteLine("Table [{0}][{1}] = {2}", r, c, cell.Text); if (c == 1) { itemName = cell.Text; } else if (c == 5 && float.TryParse(cell.Text, out expense)) { if (expense > 100) { warnings.AppendFormat("{0} is greater than $100{1}", itemName, Environment.NewLine); } } }); }); }); }); Console.WriteLine(string.Format("{0}===Warnings==={0}{1}===", Environment.NewLine, warnings)); }
internal void Handle(string bucketName, string formFile) { var task = textractAnalysisService.StartDocumentAnalysis(bucketName, formFile, "TABLES"); var jobId = task.Result; textractAnalysisService.WaitForJobCompletion(jobId); var results = textractAnalysisService.GetJobResults(jobId); var document = new TextractDocument(results); document.Pages.ForEach(page => { page.Tables.ForEach(table => { var r = 0; table.Rows.ForEach(row => { r++; var c = 0; row.Cells.ForEach(cell => { c++; Console.WriteLine("Table [{0}][{1}] = {2}", r, c, cell.Text); }); }); }); }); }
public async Task <Dictionary <string, double> > ReadDocumentData(string documentKey) { _logger.Information("Started analyzing document"); var jobId = await _analysisService.StartDocumentAnalysis(_s3Settings.S3BucketName ?? string.Empty, documentKey, "TABLES"); await _analysisService.WaitForJobCompletion(jobId); var results = await _analysisService.GetJobResults(jobId); _logger.Information("Finished analyzing document"); if (results.JobStatus == JobStatus.FAILED) { return(new Dictionary <string, double>()); } var document = new TextractDocument(results); var result = new Dictionary <string, double>(); var cellsToConsider = new List <int>(); foreach (var page in document.Pages) { foreach (var table in page.Tables) { cellsToConsider = new List <int>(); foreach (var row in table.Rows) { if (cellsToConsider.Any()) { foreach (var cellIndex in cellsToConsider) { var key = table.Rows[0].Cells[cellIndex - 1].Text; var valueString = row.Cells[cellIndex - 1].Text; if (double.TryParse(valueString, out var value)) { if (result.ContainsKey(key)) { result[key] += value; } else { result.Add(key, value); } } } continue; } foreach (var cell in row.Cells) { if (cell.Text.Split(" ").Any(x => ColumnTextsToCheck.Contains(x.ToLower())) && !cellsToConsider.Any(x => x == cell.ColumnIndex)) { cellsToConsider.Add(cell.ColumnIndex); } } } } } return(result); }
/******************************************************************************* * /// Static Methods *******************************************************************************/ public static AbstractFormObject FromTextract(TextractDocument doc, FormType formType) { // Here we'll Determine the type of object (timesheet or mileage form) and then // return the correct type. // Grab the first page and make sure it is the front if (doc.PageCount() < 2) { throw new ArgumentException(); } AbstractFormObject form; switch (formType) { case FormType.OR526_ATTENDANT: case FormType.OR507_RELIEF: form = new TimesheetForm(); break; case FormType.OR004_MILEAGE: form = new MileageForm(); break; default: throw new ArgumentException(); } // Do a silly assignment because C# won't let me assign the variable in the foreach loop instead // and there is no default constructor Page frontpage = doc.GetPage(0); bool frontfound = false; List <Page> backpages = new List <Page>(); // Improve front page detection foreach (var page in doc.Pages) { if (!frontfound) { // Search for Service Delivered On: foreach (var line in page.GetLines()) { // Ever form has "Service Delivered On:" on the front page, so we use // this to determine if this is the front or back. frontfound = line.ToString().Contains("vice Delivered O"); if (frontfound) { break; } } if (frontfound) { frontpage = page; } else { backpages.Add(page); } } else { backpages.Add(page); } } if (!frontfound) { throw new ArgumentException(); } var formitems = frontpage.GetFormItems(); // Top Form Information form.clientName = formitems[0].Value.ToString().Trim(); // Customer Name form.prime = formitems[1].Value.ToString().Trim(); // Prime form.providerName = formitems[2].Value.ToString().Trim(); // Provider Name form.providerNum = formitems[3].Value.ToString().Trim(); // Provider Num form.brokerage = formitems[4].Value.ToString().Trim(); // CM Organization form.scpaName = formitems[5].Value.ToString().Trim(); // SC/PA Name form.serviceAuthorized = formitems[6].Value.ToString().Trim(); // Service // Table var tables = frontpage.GetTables(); if (tables.Count == 0) { Console.WriteLine("No Table Information"); return(form); } form.AddTables(tables); // Populate back form objects form.AddBackForm(backpages[0]); return(form); }