internal FormTable(DataTable_internal table, ReadResult_internal readResult) : base(null, readResult.Page, null) // TODO: retrieve text and bounding box. { ColumnCount = table.Columns; RowCount = table.Rows; Cells = ConvertCells(table.Cells, readResult); }
//private const string SegmentReadResults = "readResults"; //private const string SegmentLines = "lines"; //private const string SegmentWords = "words"; private static RawExtractedItem ResolveTextReference(ReadResult_internal readResult, string reference) { // TODO: Add additional validations here. // https://github.com/Azure/azure-sdk-for-net/issues/10363 // Example: the following should result in LineIndex = 7, WordIndex = 12 // "#/readResults/3/lines/7/words/12" string[] segments = reference.Split('/'); #pragma warning disable CA1305 // Specify IFormatProvider var lineIndex = int.Parse(segments[4]); var wordIndex = int.Parse(segments[6]); #pragma warning restore CA1305 // Specify IFormatProvider // TODO: Support case where text reference is lines only, without word segment // https://github.com/Azure/azure-sdk-for-net/issues/10364 return(new RawExtractedWord(readResult.Lines.ToList()[lineIndex].Words.ToList()[wordIndex])); // Code from Chris Stone below //if (!string.IsNullOrEmpty(reference) && reference.Length > 2 && reference[0] == '#') //{ // // offset by 2 to skip the '#/' prefix // var segments = reference.Substring(2).Split('/'); // // must have an even number of segments // if (segments.Length % 2 == 0) // { // int offset; // for (var i = 0; i < segments.Length; i += 2) // { // // the next segment must be an integer // if (int.TryParse(segments[i + 1], out offset)) // { // var segment = segments[i]; // // We assume we're already on the correct page element // //// this is the root page element // //if (segment == SegmentReadResults) // //{ // // readResult = results[offset]; // //} // // // this is a text element // if (readResult != default) // { // if (segment == SegmentLines) // { // textElement = new RawExtractedLine(readResult.Lines.ToList()[offset]); // } // else if (segment == SegmentWords && textElement is RawExtractedLine) // { // textElement = (textElement as RawExtractedLine).Words[offset]; // } // } // } // } // } //} }
internal FormTable(DataTable_internal table, IReadOnlyList <ReadResult_internal> readResults, int pageIndex) { ReadResult_internal readResult = readResults[pageIndex]; PageNumber = readResult.Page; ColumnCount = table.Columns; RowCount = table.Rows; Cells = ConvertCells(table.Cells, readResults, readResult.Page); }
internal static ReadResult_internal DeserializeReadResult_internal(JsonElement element) { ReadResult_internal result = new ReadResult_internal(); foreach (var property in element.EnumerateObject()) { if (property.NameEquals("page")) { result.Page = property.Value.GetInt32(); continue; } if (property.NameEquals("angle")) { result.Angle = property.Value.GetSingle(); continue; } if (property.NameEquals("width")) { result.Width = property.Value.GetSingle(); continue; } if (property.NameEquals("height")) { result.Height = property.Value.GetSingle(); continue; } if (property.NameEquals("unit")) { result.Unit = property.Value.GetString().ToLengthUnit(); continue; } if (property.NameEquals("language")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } result.Language = new Language_internal(property.Value.GetString()); continue; } if (property.NameEquals("lines")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } result.Lines = new List <TextLine_internal>(); foreach (var item in property.Value.EnumerateArray()) { result.Lines.Add(TextLine_internal.DeserializeTextLine_internal(item)); } continue; } } return(result); }
// TODO: Refactor to move OCR code to a common file, rather than it living in this file. internal static IReadOnlyList <RawExtractedItem> ConvertTextReferences(ReadResult_internal readResult, ICollection <string> references) { List <RawExtractedItem> extractedTexts = new List <RawExtractedItem>(); foreach (var reference in references) { extractedTexts.Add(ResolveTextReference(readResult, reference)); } return(extractedTexts); }
internal ExtractedLayoutPage(PageResult_internal pageResult, ReadResult_internal readResult) { PageNumber = pageResult.Page; Tables = ConvertTables(pageResult.Tables, readResult); if (readResult != null) { RawExtractedPage = new RawExtractedPage(readResult); } }
// Unsupervised internal ExtractedPage(PageResult_internal pageResult, ReadResult_internal readResult) { PageNumber = pageResult.Page; Fields = ConvertFields(pageResult.KeyValuePairs, readResult); Tables = ExtractedLayoutPage.ConvertTables(pageResult.Tables, readResult); if (readResult != null) { RawExtractedPage = new RawExtractedPage(readResult); } }
internal ExtractedReceipt(DocumentResult_internal documentResult, ReadResult_internal readResult) { StartPageNumber = documentResult.PageRange.First(); EndPageNumber = documentResult.PageRange.Last(); SetReceiptValues(documentResult.Fields); if (readResult != null) { RawExtractedPage = new RawExtractedPage(readResult); } }
internal RawExtractedPage(ReadResult_internal readResult) { Page = readResult.Page; Angle = readResult.Angle; Width = readResult.Width; Height = readResult.Height; Unit = readResult.Unit; if (readResult.Lines != null) { Lines = ConvertLines(readResult.Lines); } }
internal RawExtractedPage(ReadResult_internal readResult) : base(default, readResult.Page, default /* TODO */) { Page = readResult.Page; TextAngle = readResult.Angle; Width = readResult.Width; Height = readResult.Height; Unit = readResult.Unit; if (readResult.Lines != null) { Lines = ConvertLines(readResult.Lines, Page); } }
internal FormPage(IReadOnlyList <DataTable_internal> tablesResult, ReadResult_internal readResult) : base(null, readResult.Page, null) // TODO: retrieve text and bounding box. { TextAngle = readResult.Angle; Width = readResult.Width; Height = readResult.Height; Unit = readResult.Unit; if (readResult.Lines != null) { Lines = RawExtractedPage.ConvertLines(readResult.Lines, PageNumber); } Tables = ExtractedLayoutPage.ConvertTables(tablesResult, readResult); }
internal FormTableCell(DataTableCell_internal dataTableCell, ReadResult_internal readResult, IReadOnlyList <string> references) : base(new BoundingBox(dataTableCell.BoundingBox), readResult.Page, dataTableCell.Text) { ColumnIndex = dataTableCell.ColumnIndex; ColumnSpan = dataTableCell.ColumnSpan ?? 1; Confidence = dataTableCell.Confidence; IsFooter = dataTableCell.IsFooter ?? false; IsHeader = dataTableCell.IsHeader ?? false; RowIndex = dataTableCell.RowIndex; RowSpan = dataTableCell.RowSpan ?? 1; if (references != null) { TextContent = ExtractedField.ConvertTextReferences(readResult, references); } }
internal FormPage(PageResult_internal pageResult, IReadOnlyList <ReadResult_internal> readResults, int pageIndex) { ReadResult_internal readResult = readResults[pageIndex]; PageNumber = readResult.Page; TextAngle = readResult.Angle; Width = readResult.Width; Height = readResult.Height; Unit = readResult.Unit; Lines = readResult.Lines != null ? ConvertLines(readResult.Lines, readResult.Page) : new List <FormLine>(); Tables = pageResult?.Tables != null ? ConvertTables(pageResult, readResults, pageIndex) : new List <FormTable>(); }
internal ExtractedTableCell(DataTableCell_internal dataTableCell, ReadResult_internal readResult, IReadOnlyList <string> references) { BoundingBox = new BoundingBox(dataTableCell.BoundingBox); ColumnIndex = dataTableCell.ColumnIndex; ColumnSpan = dataTableCell.ColumnSpan ?? 1; Confidence = dataTableCell.Confidence; IsFooter = dataTableCell.IsFooter ?? false; IsHeader = dataTableCell.IsHeader ?? false; RowIndex = dataTableCell.RowIndex; RowSpan = dataTableCell.RowSpan ?? 1; Text = dataTableCell.Text; if (references != null) { RawExtractedItems = ExtractedField.ConvertTextReferences(readResult, references); } }
internal ExtractedField(KeyValuePair_internal field, ReadResult_internal readResult) { // Unsupervised Confidence = field.Confidence; Label = field.Key.Text; LabelBoundingBox = field.Key.BoundingBox == null ? null : new BoundingBox(field.Key.BoundingBox); if (field.Key.Elements != null) { LabelRawExtractedItems = ConvertTextReferences(readResult, field.Key.Elements); } Value = field.Value.Text; ValueBoundingBox = new BoundingBox(field.Value.BoundingBox); if (field.Value.Elements != null) { ValueRawExtractedItems = ConvertTextReferences(readResult, field.Value.Elements); } }
internal FormPage(PageResult_internal pageResult, IReadOnlyList <ReadResult_internal> readResults, int pageIndex) { ReadResult_internal readResult = readResults[pageIndex]; PageNumber = readResult.Page; // Workaround because the service can sometimes return angles between 180 and 360 (bug). // Currently tracked by: https://github.com/Azure/azure-sdk-for-net/issues/12319 TextAngle = readResult.Angle <= 180.0f ? readResult.Angle : readResult.Angle - 360.0f; Width = readResult.Width; Height = readResult.Height; Unit = readResult.Unit; Lines = readResult.Lines != null ? ConvertLines(readResult.Lines, readResult.Page) : new List <FormLine>(); Tables = pageResult?.Tables != null ? ConvertTables(pageResult, readResults, pageIndex) : new List <FormTable>(); }
#pragma warning disable CA1801 internal FormField(KeyValuePair_internal field, ReadResult_internal readResult) { #pragma warning restore CA1801 //Confidence = field.Confidence; //Name = field.Key.Text; //NameBoundingBox = new BoundingBox(field.Key.BoundingBox); //if (field.Key.Elements != null) //{ // NameTextElements = ConvertTextReferences(readResult, field.Key.Elements); //} //Value = field.Value.Text; //ValueBoundingBox = new BoundingBox(field.Value.BoundingBox); //if (field.Value.Elements != null) //{ // ValueTextElements = ConvertTextReferences(readResult, field.Value.Elements); //} }
private IReadOnlyList <ExtractedPage> SetPages(ICollection <PageResult_internal> pageResults, ICollection <ReadResult_internal> readResults) { // TODO: Add validation and appropriate exception if these don't match. // https://github.com/Azure/azure-sdk-for-net/issues/10366 Debug.Assert(pageResults.Count == readResults.Count); List <ExtractedPage> pages = new List <ExtractedPage>(); for (int i = 0; i < pageResults.Count; i++) { PageResult_internal pageResult = pageResults.ElementAt(i); ReadResult_internal rawExtractedPage = readResults.ElementAt(i); SetLearnedFormType(pageResult.ClusterId); ExtractedPage page = new ExtractedPage(pageResult, rawExtractedPage); pages.Add(page); } return(pages); }
internal static AnalyzeResult_internal DeserializeAnalyzeResult_internal(JsonElement element) { AnalyzeResult_internal result = new AnalyzeResult_internal(); foreach (var property in element.EnumerateObject()) { if (property.NameEquals("version")) { result.Version = property.Value.GetString(); continue; } if (property.NameEquals("readResults")) { foreach (var item in property.Value.EnumerateArray()) { result.ReadResults.Add(ReadResult_internal.DeserializeReadResult_internal(item)); } continue; } if (property.NameEquals("pageResults")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } result.PageResults = new List <PageResult_internal>(); foreach (var item in property.Value.EnumerateArray()) { result.PageResults.Add(PageResult_internal.DeserializePageResult_internal(item)); } continue; } if (property.NameEquals("documentResults")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } result.DocumentResults = new List <DocumentResult_internal>(); foreach (var item in property.Value.EnumerateArray()) { result.DocumentResults.Add(DocumentResult_internal.DeserializeDocumentResult_internal(item)); } continue; } if (property.NameEquals("errors")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } result.Errors = new List <FormRecognizerError>(); foreach (var item in property.Value.EnumerateArray()) { result.Errors.Add(FormRecognizerError.DeserializeFormRecognizerError(item)); } continue; } } return(result); }
internal static AnalyzeResult_internal DeserializeAnalyzeResult_internal(JsonElement element) { string version = default; IReadOnlyList <ReadResult_internal> readResults = default; IReadOnlyList <PageResult_internal> pageResults = default; IReadOnlyList <DocumentResult_internal> documentResults = default; IReadOnlyList <FormRecognizerError> errors = default; foreach (var property in element.EnumerateObject()) { if (property.NameEquals("version")) { version = property.Value.GetString(); continue; } if (property.NameEquals("readResults")) { List <ReadResult_internal> array = new List <ReadResult_internal>(); foreach (var item in property.Value.EnumerateArray()) { if (item.ValueKind == JsonValueKind.Null) { array.Add(null); } else { array.Add(ReadResult_internal.DeserializeReadResult_internal(item)); } } readResults = array; continue; } if (property.NameEquals("pageResults")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } List <PageResult_internal> array = new List <PageResult_internal>(); foreach (var item in property.Value.EnumerateArray()) { if (item.ValueKind == JsonValueKind.Null) { array.Add(null); } else { array.Add(PageResult_internal.DeserializePageResult_internal(item)); } } pageResults = array; continue; } if (property.NameEquals("documentResults")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } List <DocumentResult_internal> array = new List <DocumentResult_internal>(); foreach (var item in property.Value.EnumerateArray()) { if (item.ValueKind == JsonValueKind.Null) { array.Add(null); } else { array.Add(DocumentResult_internal.DeserializeDocumentResult_internal(item)); } } documentResults = array; continue; } if (property.NameEquals("errors")) { if (property.Value.ValueKind == JsonValueKind.Null) { continue; } List <FormRecognizerError> array = new List <FormRecognizerError>(); foreach (var item in property.Value.EnumerateArray()) { if (item.ValueKind == JsonValueKind.Null) { array.Add(null); } else { array.Add(FormRecognizerError.DeserializeFormRecognizerError(item)); } } errors = array; continue; } } return(new AnalyzeResult_internal(version, readResults, pageResults, documentResults, errors)); }
private static IReadOnlyList <ExtractedField> ConvertFields(ICollection <KeyValuePair_internal> keyValuePairs, ReadResult_internal readResult) { List <ExtractedField> fields = new List <ExtractedField>(); foreach (var kvp in keyValuePairs) { ExtractedField field = new ExtractedField(kvp, readResult); fields.Add(field); } return(fields); }
private static IReadOnlyList <ExtractedTableCell> ConvertCells(ICollection <DataTableCell_internal> cellsResult, ReadResult_internal readResult) { List <ExtractedTableCell> cells = new List <ExtractedTableCell>(); foreach (var result in cellsResult) { cells.Add(new ExtractedTableCell(result, readResult, result.Elements)); } return(cells); }
internal ExtractedTable(DataTable_internal table, ReadResult_internal readResult) { ColumnCount = table.Columns; RowCount = table.Rows; Cells = ConvertCells(table.Cells, readResult); }
internal static IReadOnlyList <ExtractedTable> ConvertTables(IReadOnlyList <DataTable_internal> tablesResult, ReadResult_internal readResult) { List <ExtractedTable> tables = new List <ExtractedTable>(); foreach (var result in tablesResult) { tables.Add(new ExtractedTable(result, readResult)); } return(tables); }
// Supervised internal ExtractedPage(int pageNumber, List <ExtractedField> fields, PageResult_internal pageResult, ReadResult_internal readResult) { PageNumber = pageNumber; Fields = ConvertFields(fields); Tables = ExtractedLayoutPage.ConvertTables(pageResult.Tables, readResult); if (readResult != null) { RawExtractedPage = new RawExtractedPage(readResult); } }