Ejemplo n.º 1
0
 internal FormTable(DataTable_internal table, ReadResult_internal readResult)
     : base(null, readResult.Page, null) // TODO: retrieve text and bounding box.
 {
     ColumnCount = table.Columns;
     RowCount    = table.Rows;
     Cells       = ConvertCells(table.Cells, readResult);
 }
Ejemplo n.º 2
0
        //private const string SegmentReadResults = "readResults";
        //private const string SegmentLines = "lines";
        //private const string SegmentWords = "words";

        private static RawExtractedItem ResolveTextReference(ReadResult_internal readResult, string reference)
        {
            // TODO: Add additional validations here.
            // https://github.com/Azure/azure-sdk-for-net/issues/10363

            // Example: the following should result in LineIndex = 7, WordIndex = 12
            // "#/readResults/3/lines/7/words/12"
            string[] segments = reference.Split('/');

#pragma warning disable CA1305 // Specify IFormatProvider
            var lineIndex = int.Parse(segments[4]);
            var wordIndex = int.Parse(segments[6]);
#pragma warning restore CA1305 // Specify IFormatProvider

            // TODO: Support case where text reference is lines only, without word segment
            // https://github.com/Azure/azure-sdk-for-net/issues/10364
            return(new RawExtractedWord(readResult.Lines.ToList()[lineIndex].Words.ToList()[wordIndex]));

            // Code from Chris Stone below
            //if (!string.IsNullOrEmpty(reference) && reference.Length > 2 && reference[0] == '#')
            //{
            //    // offset by 2 to skip the '#/' prefix
            //    var segments = reference.Substring(2).Split('/');

            //    // must have an even number of segments
            //    if (segments.Length % 2 == 0)
            //    {
            //        int offset;
            //        for (var i = 0; i < segments.Length; i += 2)
            //        {
            //            // the next segment must be an integer
            //            if (int.TryParse(segments[i + 1], out offset))
            //            {
            //                var segment = segments[i];

            //                // We assume we're already on the correct page element
            //                //// this is the root page element
            //                //if (segment == SegmentReadResults)
            //                //{
            //                //    readResult = results[offset];
            //                //}
            //
            //                // this is a text element
            //                if (readResult != default)
            //                {
            //                    if (segment == SegmentLines)
            //                    {
            //                        textElement = new RawExtractedLine(readResult.Lines.ToList()[offset]);
            //                    }
            //                    else if (segment == SegmentWords && textElement is RawExtractedLine)
            //                    {
            //                        textElement = (textElement as RawExtractedLine).Words[offset];
            //                    }
            //                }
            //            }
            //        }
            //    }
            //}
        }
Ejemplo n.º 3
0
        internal FormTable(DataTable_internal table, IReadOnlyList <ReadResult_internal> readResults, int pageIndex)
        {
            ReadResult_internal readResult = readResults[pageIndex];

            PageNumber  = readResult.Page;
            ColumnCount = table.Columns;
            RowCount    = table.Rows;
            Cells       = ConvertCells(table.Cells, readResults, readResult.Page);
        }
        internal static ReadResult_internal DeserializeReadResult_internal(JsonElement element)
        {
            ReadResult_internal result = new ReadResult_internal();

            foreach (var property in element.EnumerateObject())
            {
                if (property.NameEquals("page"))
                {
                    result.Page = property.Value.GetInt32();
                    continue;
                }
                if (property.NameEquals("angle"))
                {
                    result.Angle = property.Value.GetSingle();
                    continue;
                }
                if (property.NameEquals("width"))
                {
                    result.Width = property.Value.GetSingle();
                    continue;
                }
                if (property.NameEquals("height"))
                {
                    result.Height = property.Value.GetSingle();
                    continue;
                }
                if (property.NameEquals("unit"))
                {
                    result.Unit = property.Value.GetString().ToLengthUnit();
                    continue;
                }
                if (property.NameEquals("language"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    result.Language = new Language_internal(property.Value.GetString());
                    continue;
                }
                if (property.NameEquals("lines"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    result.Lines = new List <TextLine_internal>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        result.Lines.Add(TextLine_internal.DeserializeTextLine_internal(item));
                    }
                    continue;
                }
            }
            return(result);
        }
Ejemplo n.º 5
0
        // TODO: Refactor to move OCR code to a common file, rather than it living in this file.
        internal static IReadOnlyList <RawExtractedItem> ConvertTextReferences(ReadResult_internal readResult, ICollection <string> references)
        {
            List <RawExtractedItem> extractedTexts = new List <RawExtractedItem>();

            foreach (var reference in references)
            {
                extractedTexts.Add(ResolveTextReference(readResult, reference));
            }
            return(extractedTexts);
        }
        internal ExtractedLayoutPage(PageResult_internal pageResult, ReadResult_internal readResult)
        {
            PageNumber = pageResult.Page;
            Tables     = ConvertTables(pageResult.Tables, readResult);

            if (readResult != null)
            {
                RawExtractedPage = new RawExtractedPage(readResult);
            }
        }
        // Unsupervised
        internal ExtractedPage(PageResult_internal pageResult, ReadResult_internal readResult)
        {
            PageNumber = pageResult.Page;
            Fields     = ConvertFields(pageResult.KeyValuePairs, readResult);
            Tables     = ExtractedLayoutPage.ConvertTables(pageResult.Tables, readResult);

            if (readResult != null)
            {
                RawExtractedPage = new RawExtractedPage(readResult);
            }
        }
Ejemplo n.º 8
0
        internal ExtractedReceipt(DocumentResult_internal documentResult, ReadResult_internal readResult)
        {
            StartPageNumber = documentResult.PageRange.First();
            EndPageNumber   = documentResult.PageRange.Last();

            SetReceiptValues(documentResult.Fields);

            if (readResult != null)
            {
                RawExtractedPage = new RawExtractedPage(readResult);
            }
        }
Ejemplo n.º 9
0
        internal RawExtractedPage(ReadResult_internal readResult)
        {
            Page   = readResult.Page;
            Angle  = readResult.Angle;
            Width  = readResult.Width;
            Height = readResult.Height;
            Unit   = readResult.Unit;

            if (readResult.Lines != null)
            {
                Lines = ConvertLines(readResult.Lines);
            }
        }
Ejemplo n.º 10
0
        internal RawExtractedPage(ReadResult_internal readResult)
            : base(default, readResult.Page, default /* TODO */)
        {
            Page      = readResult.Page;
            TextAngle = readResult.Angle;
            Width     = readResult.Width;
            Height    = readResult.Height;
            Unit      = readResult.Unit;

            if (readResult.Lines != null)
            {
                Lines = ConvertLines(readResult.Lines, Page);
            }
        }
Ejemplo n.º 11
0
        internal FormPage(IReadOnlyList <DataTable_internal> tablesResult, ReadResult_internal readResult)
            : base(null, readResult.Page, null) // TODO: retrieve text and bounding box.
        {
            TextAngle = readResult.Angle;
            Width     = readResult.Width;
            Height    = readResult.Height;
            Unit      = readResult.Unit;

            if (readResult.Lines != null)
            {
                Lines = RawExtractedPage.ConvertLines(readResult.Lines, PageNumber);
            }

            Tables = ExtractedLayoutPage.ConvertTables(tablesResult, readResult);
        }
Ejemplo n.º 12
0
        internal FormTableCell(DataTableCell_internal dataTableCell, ReadResult_internal readResult, IReadOnlyList <string> references)
            : base(new BoundingBox(dataTableCell.BoundingBox), readResult.Page, dataTableCell.Text)
        {
            ColumnIndex = dataTableCell.ColumnIndex;
            ColumnSpan  = dataTableCell.ColumnSpan ?? 1;
            Confidence  = dataTableCell.Confidence;
            IsFooter    = dataTableCell.IsFooter ?? false;
            IsHeader    = dataTableCell.IsHeader ?? false;
            RowIndex    = dataTableCell.RowIndex;
            RowSpan     = dataTableCell.RowSpan ?? 1;

            if (references != null)
            {
                TextContent = ExtractedField.ConvertTextReferences(readResult, references);
            }
        }
Ejemplo n.º 13
0
        internal FormPage(PageResult_internal pageResult, IReadOnlyList <ReadResult_internal> readResults, int pageIndex)
        {
            ReadResult_internal readResult = readResults[pageIndex];

            PageNumber = readResult.Page;
            TextAngle  = readResult.Angle;
            Width      = readResult.Width;
            Height     = readResult.Height;
            Unit       = readResult.Unit;
            Lines      = readResult.Lines != null
                ? ConvertLines(readResult.Lines, readResult.Page)
                : new List <FormLine>();

            Tables = pageResult?.Tables != null
                ? ConvertTables(pageResult, readResults, pageIndex)
                : new List <FormTable>();
        }
Ejemplo n.º 14
0
        internal ExtractedTableCell(DataTableCell_internal dataTableCell, ReadResult_internal readResult, IReadOnlyList <string> references)
        {
            BoundingBox = new BoundingBox(dataTableCell.BoundingBox);
            ColumnIndex = dataTableCell.ColumnIndex;
            ColumnSpan  = dataTableCell.ColumnSpan ?? 1;
            Confidence  = dataTableCell.Confidence;
            IsFooter    = dataTableCell.IsFooter ?? false;
            IsHeader    = dataTableCell.IsHeader ?? false;
            RowIndex    = dataTableCell.RowIndex;
            RowSpan     = dataTableCell.RowSpan ?? 1;
            Text        = dataTableCell.Text;

            if (references != null)
            {
                RawExtractedItems = ExtractedField.ConvertTextReferences(readResult, references);
            }
        }
Ejemplo n.º 15
0
        internal ExtractedField(KeyValuePair_internal field, ReadResult_internal readResult)
        {
            // Unsupervised
            Confidence = field.Confidence;
            Label      = field.Key.Text;

            LabelBoundingBox = field.Key.BoundingBox == null ? null : new BoundingBox(field.Key.BoundingBox);
            if (field.Key.Elements != null)
            {
                LabelRawExtractedItems = ConvertTextReferences(readResult, field.Key.Elements);
            }

            Value            = field.Value.Text;
            ValueBoundingBox = new BoundingBox(field.Value.BoundingBox);

            if (field.Value.Elements != null)
            {
                ValueRawExtractedItems = ConvertTextReferences(readResult, field.Value.Elements);
            }
        }
Ejemplo n.º 16
0
        internal FormPage(PageResult_internal pageResult, IReadOnlyList <ReadResult_internal> readResults, int pageIndex)
        {
            ReadResult_internal readResult = readResults[pageIndex];

            PageNumber = readResult.Page;

            // Workaround because the service can sometimes return angles between 180 and 360 (bug).
            // Currently tracked by: https://github.com/Azure/azure-sdk-for-net/issues/12319
            TextAngle = readResult.Angle <= 180.0f ? readResult.Angle : readResult.Angle - 360.0f;

            Width  = readResult.Width;
            Height = readResult.Height;
            Unit   = readResult.Unit;
            Lines  = readResult.Lines != null
                ? ConvertLines(readResult.Lines, readResult.Page)
                : new List <FormLine>();

            Tables = pageResult?.Tables != null
                ? ConvertTables(pageResult, readResults, pageIndex)
                : new List <FormTable>();
        }
Ejemplo n.º 17
0
#pragma warning disable CA1801
        internal FormField(KeyValuePair_internal field, ReadResult_internal readResult)
        {
#pragma warning restore CA1801
            //Confidence = field.Confidence;

            //Name = field.Key.Text;
            //NameBoundingBox = new BoundingBox(field.Key.BoundingBox);

            //if (field.Key.Elements != null)
            //{
            //    NameTextElements = ConvertTextReferences(readResult, field.Key.Elements);
            //}

            //Value = field.Value.Text;
            //ValueBoundingBox = new BoundingBox(field.Value.BoundingBox);

            //if (field.Value.Elements != null)
            //{
            //    ValueTextElements = ConvertTextReferences(readResult, field.Value.Elements);
            //}
        }
Ejemplo n.º 18
0
        private IReadOnlyList <ExtractedPage> SetPages(ICollection <PageResult_internal> pageResults, ICollection <ReadResult_internal> readResults)
        {
            // TODO: Add validation and appropriate exception if these don't match.
            // https://github.com/Azure/azure-sdk-for-net/issues/10366
            Debug.Assert(pageResults.Count == readResults.Count);

            List <ExtractedPage> pages = new List <ExtractedPage>();

            for (int i = 0; i < pageResults.Count; i++)
            {
                PageResult_internal pageResult       = pageResults.ElementAt(i);
                ReadResult_internal rawExtractedPage = readResults.ElementAt(i);

                SetLearnedFormType(pageResult.ClusterId);

                ExtractedPage page = new ExtractedPage(pageResult, rawExtractedPage);
                pages.Add(page);
            }

            return(pages);
        }
Ejemplo n.º 19
0
        internal static AnalyzeResult_internal DeserializeAnalyzeResult_internal(JsonElement element)
        {
            AnalyzeResult_internal result = new AnalyzeResult_internal();

            foreach (var property in element.EnumerateObject())
            {
                if (property.NameEquals("version"))
                {
                    result.Version = property.Value.GetString();
                    continue;
                }
                if (property.NameEquals("readResults"))
                {
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        result.ReadResults.Add(ReadResult_internal.DeserializeReadResult_internal(item));
                    }
                    continue;
                }
                if (property.NameEquals("pageResults"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    result.PageResults = new List <PageResult_internal>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        result.PageResults.Add(PageResult_internal.DeserializePageResult_internal(item));
                    }
                    continue;
                }
                if (property.NameEquals("documentResults"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    result.DocumentResults = new List <DocumentResult_internal>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        result.DocumentResults.Add(DocumentResult_internal.DeserializeDocumentResult_internal(item));
                    }
                    continue;
                }
                if (property.NameEquals("errors"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    result.Errors = new List <FormRecognizerError>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        result.Errors.Add(FormRecognizerError.DeserializeFormRecognizerError(item));
                    }
                    continue;
                }
            }
            return(result);
        }
Ejemplo n.º 20
0
        internal static AnalyzeResult_internal DeserializeAnalyzeResult_internal(JsonElement element)
        {
            string version = default;
            IReadOnlyList <ReadResult_internal>     readResults     = default;
            IReadOnlyList <PageResult_internal>     pageResults     = default;
            IReadOnlyList <DocumentResult_internal> documentResults = default;
            IReadOnlyList <FormRecognizerError>     errors          = default;

            foreach (var property in element.EnumerateObject())
            {
                if (property.NameEquals("version"))
                {
                    version = property.Value.GetString();
                    continue;
                }
                if (property.NameEquals("readResults"))
                {
                    List <ReadResult_internal> array = new List <ReadResult_internal>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        if (item.ValueKind == JsonValueKind.Null)
                        {
                            array.Add(null);
                        }
                        else
                        {
                            array.Add(ReadResult_internal.DeserializeReadResult_internal(item));
                        }
                    }
                    readResults = array;
                    continue;
                }
                if (property.NameEquals("pageResults"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    List <PageResult_internal> array = new List <PageResult_internal>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        if (item.ValueKind == JsonValueKind.Null)
                        {
                            array.Add(null);
                        }
                        else
                        {
                            array.Add(PageResult_internal.DeserializePageResult_internal(item));
                        }
                    }
                    pageResults = array;
                    continue;
                }
                if (property.NameEquals("documentResults"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    List <DocumentResult_internal> array = new List <DocumentResult_internal>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        if (item.ValueKind == JsonValueKind.Null)
                        {
                            array.Add(null);
                        }
                        else
                        {
                            array.Add(DocumentResult_internal.DeserializeDocumentResult_internal(item));
                        }
                    }
                    documentResults = array;
                    continue;
                }
                if (property.NameEquals("errors"))
                {
                    if (property.Value.ValueKind == JsonValueKind.Null)
                    {
                        continue;
                    }
                    List <FormRecognizerError> array = new List <FormRecognizerError>();
                    foreach (var item in property.Value.EnumerateArray())
                    {
                        if (item.ValueKind == JsonValueKind.Null)
                        {
                            array.Add(null);
                        }
                        else
                        {
                            array.Add(FormRecognizerError.DeserializeFormRecognizerError(item));
                        }
                    }
                    errors = array;
                    continue;
                }
            }
            return(new AnalyzeResult_internal(version, readResults, pageResults, documentResults, errors));
        }
        private static IReadOnlyList <ExtractedField> ConvertFields(ICollection <KeyValuePair_internal> keyValuePairs, ReadResult_internal readResult)
        {
            List <ExtractedField> fields = new List <ExtractedField>();

            foreach (var kvp in keyValuePairs)
            {
                ExtractedField field = new ExtractedField(kvp, readResult);
                fields.Add(field);
            }
            return(fields);
        }
Ejemplo n.º 22
0
        private static IReadOnlyList <ExtractedTableCell> ConvertCells(ICollection <DataTableCell_internal> cellsResult, ReadResult_internal readResult)
        {
            List <ExtractedTableCell> cells = new List <ExtractedTableCell>();

            foreach (var result in cellsResult)
            {
                cells.Add(new ExtractedTableCell(result, readResult, result.Elements));
            }

            return(cells);
        }
Ejemplo n.º 23
0
 internal ExtractedTable(DataTable_internal table, ReadResult_internal readResult)
 {
     ColumnCount = table.Columns;
     RowCount    = table.Rows;
     Cells       = ConvertCells(table.Cells, readResult);
 }
        internal static IReadOnlyList <ExtractedTable> ConvertTables(IReadOnlyList <DataTable_internal> tablesResult, ReadResult_internal readResult)
        {
            List <ExtractedTable> tables = new List <ExtractedTable>();

            foreach (var result in tablesResult)
            {
                tables.Add(new ExtractedTable(result, readResult));
            }

            return(tables);
        }
        // Supervised
        internal ExtractedPage(int pageNumber, List <ExtractedField> fields, PageResult_internal pageResult, ReadResult_internal readResult)
        {
            PageNumber = pageNumber;
            Fields     = ConvertFields(fields);
            Tables     = ExtractedLayoutPage.ConvertTables(pageResult.Tables, readResult);

            if (readResult != null)
            {
                RawExtractedPage = new RawExtractedPage(readResult);
            }
        }