// result can be stored in .tsv
        // (which does not use , or ; as separator - which can be appeared in key/value)
        public string CreateKeyValueText(GetDocumentAnalysisResponse response)
        {
            StringBuilder sb        = new StringBuilder();
            string        seperator = "\t";

            sb.AppendLine("Key" + seperator + "Value");
            var document = new TextractDocument(response);

            document.Pages.ForEach(page => {
                page.Form.Fields.ForEach(f => {
                    sb.AppendLine(f.Key + seperator + f.Value);
                });
            });
            return(sb.ToString());
        }
        internal void Handle(string bucketName, string formFile, string localFolder, string localFile)
        {
            var task  = textractAnalysisService.StartDocumentAnalysis(bucketName, formFile, "FORMS");
            var jobId = task.Result;

            textractAnalysisService.WaitForJobCompletion(jobId);
            var results = textractAnalysisService.GetJobResults(jobId);

            var redactableImage = Path.Join(localFolder, "redacted-" + formFile);

            if (File.Exists(redactableImage))
            {
                File.Delete(redactableImage);
            }
            File.Copy(localFile, redactableImage);
            var image    = Image.FromFile(redactableImage);
            var graphics = Graphics.FromImage(image);
            var height   = image.Height;
            var width    = image.Width;

            Console.WriteLine("image dimensions: {0}x{1}", width, height);

            var document = new TextractDocument(results);

            document.Pages.ForEach(page => {
                page.Form.Fields.ForEach(field => {
                    if (field.Key.Text.ToLower().Contains("address"))
                    {
                        Console.WriteLine("Redacting Key: {0}, Value: {1}", field.Key.Text, field.Value.Text);
                        var bb = field.Value.Geometry.BoundingBox;
                        Console.WriteLine(bb);
                        var x1 = bb.Left * width;
                        var y1 = bb.Top * height - 2;
                        var x2 = bb.Width * width + 2;
                        var y2 = bb.Height * height + 2;

                        Console.WriteLine("x1: {0}, x2: {1}, y1: {2}, y2: {3}", x1, x2, y1, y2);
                        graphics.FillRectangle(new SolidBrush(Color.Black), x1, y1, x2, y2);
                        graphics.Save();
                        image.Save(redactableImage);
                        Console.WriteLine("redacted image saved at: {0}", redactableImage);
                    }
                });
            });
        }
Ejemplo n.º 3
0
        public async Task <string> ReadDocumentTable(string documentKey)
        {
            _logger.Information("Started analyzing document");

            var jobId = await _analysisService.StartDocumentAnalysis(_s3Settings.S3BucketName ?? string.Empty, documentKey, "TABLES");

            await _analysisService.WaitForJobCompletion(jobId);

            var results = await _analysisService.GetJobResults(jobId);

            _logger.Information("Finished analyzing document");

            if (results.JobStatus == JobStatus.FAILED)
            {
                return(string.Empty);
            }

            var document = new TextractDocument(results);

            var stringBuilder = new StringBuilder();

            foreach (var page in document.Pages)
            {
                foreach (var table in page.Tables)
                {
                    var rowIndex = 0;
                    foreach (var row in table.Rows)
                    {
                        rowIndex++;

                        var cellIndex = 0;
                        foreach (var cell in row.Cells)
                        {
                            cellIndex++;
                            stringBuilder.Append($"Table [{rowIndex}][{cellIndex}] = {cell.Text}").Append(Environment.NewLine);
                        }
                    }
                }
            }

            return(stringBuilder.ToString());
        }
        internal void Handle(string bucketName, string formFile)
        {
            var task  = textractAnalysisService.StartDocumentAnalysis(bucketName, formFile, "FORMS");
            var jobId = task.Result;

            textractAnalysisService.WaitForJobCompletion(jobId);
            var results  = textractAnalysisService.GetJobResults(jobId);
            var document = new TextractDocument(results);

            document.Pages.ForEach(page => {
                page.Form.Fields.ForEach(f => {
                    Console.WriteLine("Key: {0}, Value {1}", f.Key, f.Value);
                });
                Console.WriteLine("Get Field by Key:");
                var key   = "Phone Number:";
                var field = page.Form.GetFieldByKey(key);
                if (field != null)
                {
                    Console.WriteLine("Key: {0}, Value: {1}", field.Key, field.Value);
                }
            });
        }
Ejemplo n.º 5
0
        internal void Handle(string bucketName, string expenseFile)
        {
            var task  = textractAnalysisService.StartDocumentAnalysis(bucketName, expenseFile, "TABLES");
            var jobId = task.Result;

            textractAnalysisService.WaitForJobCompletion(jobId);
            var   results  = textractAnalysisService.GetJobResults(jobId);
            var   warnings = new StringBuilder();
            float expense;
            var   lineItem = new List <string>();
            var   document = new TextractDocument(results);

            document.Pages.ForEach(page => {
                page.Tables.ForEach(table => {
                    var r = 0;
                    table.Rows.ForEach(row => {
                        r++;
                        var itemName = string.Empty;
                        var c        = 0;
                        row.Cells.ForEach(cell => {
                            c++;
                            Console.WriteLine("Table [{0}][{1}] = {2}", r, c, cell.Text);
                            if (c == 1)
                            {
                                itemName = cell.Text;
                            }
                            else if (c == 5 && float.TryParse(cell.Text, out expense))
                            {
                                if (expense > 100)
                                {
                                    warnings.AppendFormat("{0} is greater than $100{1}", itemName, Environment.NewLine);
                                }
                            }
                        });
                    });
                });
            });
            Console.WriteLine(string.Format("{0}===Warnings==={0}{1}===", Environment.NewLine, warnings));
        }
        internal void Handle(string bucketName, string formFile)
        {
            var task  = textractAnalysisService.StartDocumentAnalysis(bucketName, formFile, "TABLES");
            var jobId = task.Result;

            textractAnalysisService.WaitForJobCompletion(jobId);
            var results  = textractAnalysisService.GetJobResults(jobId);
            var document = new TextractDocument(results);

            document.Pages.ForEach(page => {
                page.Tables.ForEach(table => {
                    var r = 0;
                    table.Rows.ForEach(row => {
                        r++;
                        var c = 0;
                        row.Cells.ForEach(cell => {
                            c++;
                            Console.WriteLine("Table [{0}][{1}] = {2}", r, c, cell.Text);
                        });
                    });
                });
            });
        }
Ejemplo n.º 7
0
        public async Task <Dictionary <string, double> > ReadDocumentData(string documentKey)
        {
            _logger.Information("Started analyzing document");

            var jobId = await _analysisService.StartDocumentAnalysis(_s3Settings.S3BucketName ?? string.Empty, documentKey, "TABLES");

            await _analysisService.WaitForJobCompletion(jobId);

            var results = await _analysisService.GetJobResults(jobId);

            _logger.Information("Finished analyzing document");

            if (results.JobStatus == JobStatus.FAILED)
            {
                return(new Dictionary <string, double>());
            }

            var document = new TextractDocument(results);
            var result   = new Dictionary <string, double>();

            var cellsToConsider = new List <int>();

            foreach (var page in document.Pages)
            {
                foreach (var table in page.Tables)
                {
                    cellsToConsider = new List <int>();
                    foreach (var row in table.Rows)
                    {
                        if (cellsToConsider.Any())
                        {
                            foreach (var cellIndex in cellsToConsider)
                            {
                                var key         = table.Rows[0].Cells[cellIndex - 1].Text;
                                var valueString = row.Cells[cellIndex - 1].Text;

                                if (double.TryParse(valueString, out var value))
                                {
                                    if (result.ContainsKey(key))
                                    {
                                        result[key] += value;
                                    }
                                    else
                                    {
                                        result.Add(key, value);
                                    }
                                }
                            }

                            continue;
                        }

                        foreach (var cell in row.Cells)
                        {
                            if (cell.Text.Split(" ").Any(x => ColumnTextsToCheck.Contains(x.ToLower())) && !cellsToConsider.Any(x => x == cell.ColumnIndex))
                            {
                                cellsToConsider.Add(cell.ColumnIndex);
                            }
                        }
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 8
0
    /*******************************************************************************
    *  /// Static Methods
    *******************************************************************************/

    public static AbstractFormObject FromTextract(TextractDocument doc, FormType formType)
    {
        // Here we'll Determine the type of object (timesheet or mileage form) and then
        // return the correct type.

        // Grab the first page and make sure it is the front
        if (doc.PageCount() < 2)
        {
            throw new ArgumentException();
        }

        AbstractFormObject form;

        switch (formType)
        {
        case FormType.OR526_ATTENDANT:
        case FormType.OR507_RELIEF:
            form = new TimesheetForm();
            break;

        case FormType.OR004_MILEAGE:
            form = new MileageForm();
            break;

        default:
            throw new ArgumentException();
        }
        // Do a silly assignment because C# won't let me assign the variable in the foreach loop instead
        // and there is no default constructor
        Page        frontpage  = doc.GetPage(0);
        bool        frontfound = false;
        List <Page> backpages  = new List <Page>();

        // Improve front page detection
        foreach (var page in doc.Pages)
        {
            if (!frontfound)
            {
                // Search for Service Delivered On:
                foreach (var line in page.GetLines())
                {
                    // Ever form has "Service Delivered On:" on the front page, so we use
                    // this to determine if this is the front or back.
                    frontfound = line.ToString().Contains("vice Delivered O");
                    if (frontfound)
                    {
                        break;
                    }
                }
                if (frontfound)
                {
                    frontpage = page;
                }
                else
                {
                    backpages.Add(page);
                }
            }
            else
            {
                backpages.Add(page);
            }
        }

        if (!frontfound)
        {
            throw new ArgumentException();
        }
        var formitems = frontpage.GetFormItems();

        // Top Form Information

        form.clientName        = formitems[0].Value.ToString().Trim(); // Customer Name
        form.prime             = formitems[1].Value.ToString().Trim(); // Prime
        form.providerName      = formitems[2].Value.ToString().Trim(); // Provider Name
        form.providerNum       = formitems[3].Value.ToString().Trim(); // Provider Num
        form.brokerage         = formitems[4].Value.ToString().Trim(); // CM Organization
        form.scpaName          = formitems[5].Value.ToString().Trim(); // SC/PA Name
        form.serviceAuthorized = formitems[6].Value.ToString().Trim(); // Service

        // Table
        var tables = frontpage.GetTables();

        if (tables.Count == 0)
        {
            Console.WriteLine("No Table Information");
            return(form);
        }
        form.AddTables(tables);
        // Populate back form objects
        form.AddBackForm(backpages[0]);

        return(form);
    }