//private const string SegmentReadResults = "readResults"; //private const string SegmentLines = "lines"; //private const string SegmentWords = "words"; private static FormContent ResolveTextReference(ReadResult_internal readResult, string reference) { // TODO: Add additional validations here. // https://github.com/Azure/azure-sdk-for-net/issues/10363 // Example: the following should result in LineIndex = 7, WordIndex = 12 // "#/readResults/3/lines/7/words/12" string[] segments = reference.Split('/'); var lineIndex = int.Parse(segments[4], CultureInfo.InvariantCulture); var wordIndex = int.Parse(segments[6], CultureInfo.InvariantCulture); // TODO: Support case where text reference is lines only, without word segment // https://github.com/Azure/azure-sdk-for-net/issues/10364 return(new FormWord(readResult.Lines.ToList()[lineIndex].Words.ToList()[wordIndex], readResult.Page)); // Code from Chris Stone below //if (!string.IsNullOrEmpty(reference) && reference.Length > 2 && reference[0] == '#') //{ // // offset by 2 to skip the '#/' prefix // var segments = reference.Substring(2).Split('/'); // // must have an even number of segments // if (segments.Length % 2 == 0) // { // int offset; // for (var i = 0; i < segments.Length; i += 2) // { // // the next segment must be an integer // if (int.TryParse(segments[i + 1], out offset)) // { // var segment = segments[i]; // // We assume we're already on the correct page element // //// this is the root page element // //if (segment == SegmentReadResults) // //{ // // readResult = results[offset]; // //} // // // this is a text element // if (readResult != default) // { // if (segment == SegmentLines) // { // textElement = new RawExtractedLine(readResult.Lines.ToList()[offset]); // } // else if (segment == SegmentWords && textElement is RawExtractedLine) // { // textElement = (textElement as RawExtractedLine).Words[offset]; // } // } // } // } // } //} }
// TODO: Refactor to move OCR code to a common file, rather than it living in this file. internal static IReadOnlyList <FormContent> ConvertTextReferences(ReadResult_internal readResult, IReadOnlyList <string> references) { List <FormContent> extractedTexts = new List <FormContent>(); foreach (var reference in references) { extractedTexts.Add(ResolveTextReference(readResult, reference)); } return(extractedTexts); }
// TODO: Refactor to move OCR code to a common file, rather than it living in this file. internal static IReadOnlyList <RawExtractedItem> ConvertTextReferences(ReadResult_internal readResult, ICollection <string> references) { List <RawExtractedItem> extractedTexts = new List <RawExtractedItem>(); foreach (var reference in references) { extractedTexts.Add(ResolveTextReference(readResult, reference)); } return(extractedTexts); }
// Unsupervised internal ExtractedPage(PageResult_internal pageResult, ReadResult_internal readResult) { PageNumber = pageResult.Page; FormTypeId = pageResult.ClusterId; Fields = ConvertFields(pageResult.KeyValuePairs, readResult); Tables = ExtractedLayoutPage.ConvertTables(pageResult.Tables, readResult); if (readResult != null) { RawExtractedPage = new RawExtractedPage(readResult); } }
internal ExtractedField(KeyValuePair_internal field, ReadResult_internal readResult) { Confidence = field.Confidence; Name = field.Key.Text; NameBoundingBox = field.Key.BoundingBox == null ? null : new BoundingBox(field.Key.BoundingBox); if (field.Key.Elements != null) { NameRawExtractedItems = ConvertTextReferences(readResult, field.Key.Elements); } Value = field.Value.Text; ValueBoundingBox = field.Value.BoundingBox == null ? null : new BoundingBox(field.Value.BoundingBox); if (field.Value.Elements != null) { ValueRawExtractedItems = ConvertTextReferences(readResult, field.Value.Elements); } }
#pragma warning disable CA1801 internal FormField(KeyValuePair_internal field, ReadResult_internal readResult) { #pragma warning restore CA1801 //Confidence = field.Confidence; //Name = field.Key.Text; //NameBoundingBox = new BoundingBox(field.Key.BoundingBox); //if (field.Key.Elements != null) //{ // NameTextElements = ConvertTextReferences(readResult, field.Key.Elements); //} //Value = field.Value.Text; //ValueBoundingBox = new BoundingBox(field.Value.BoundingBox); //if (field.Value.Elements != null) //{ // ValueTextElements = ConvertTextReferences(readResult, field.Value.Elements); //} }
internal ExtractedLabeledTable(DataTable_internal table, ReadResult_internal readResult) : base(table, readResult) { }
private static IReadOnlyList <ExtractedField> ConvertFields(IReadOnlyList <KeyValuePair_internal> keyValuePairs, ReadResult_internal readResult) { List <ExtractedField> fields = new List <ExtractedField>(); foreach (var kvp in keyValuePairs) { ExtractedField field = new ExtractedField(kvp, readResult); fields.Add(field); } return(fields); }
internal ExtractedLabeledTable(DataTable_internal table, ReadResult_internal readResult, int pageNumber) : base(table, readResult) { PageNumber = pageNumber; }