Esempio n. 1
0
        public async Task StartRecognizeCustomFormsWithoutLabelsCanParseBlankPage()
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions()
            {
                IncludeFieldElements = true
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : false);

            using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.Blank);
            using (Recording.DisableRequestBodyRecording())
            {
                operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
            }

            RecognizedFormCollection recognizedForms = await operation.WaitForCompletionAsync();

            var blankForm = recognizedForms.Single();

            ValidateModelWithNoLabelsForm(
                blankForm,
                trainedModel.ModelId,
                includeFieldElements: true,
                expectedFirstPageNumber: 1,
                expectedLastPageNumber: 1);

            Assert.AreEqual(0, blankForm.Fields.Count);

            var blankPage = blankForm.Pages.Single();

            Assert.AreEqual(0, blankPage.Lines.Count);
            Assert.AreEqual(0, blankPage.Tables.Count);
        }
Esempio n. 2
0
        public async Task StartRecognizeCustomFormsWithLabelsAndSelectionMarks(bool includeFieldElements)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions {
                IncludeFieldElements = includeFieldElements
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : true, ContainerType.SelectionMarks);

            using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.FormSelectionMarks);
            using (Recording.DisableRequestBodyRecording())
            {
                operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
            }

            await operation.WaitForCompletionAsync();

            Assert.IsTrue(operation.HasValue);
            Assert.GreaterOrEqual(operation.Value.Count, 1);

            RecognizedForm form = operation.Value.Single();

            ValidateRecognizedForm(form, includeFieldElements: includeFieldElements,
                                   expectedFirstPageNumber: 1, expectedLastPageNumber: 1);

            // Testing that we shuffle things around correctly so checking only once per property.
            Assert.IsNotEmpty(form.FormType);
            Assert.IsNotNull(form.Fields);
            var name = "AMEX_SELECTION_MARK";

            Assert.IsNotNull(form.Fields[name]);
            Assert.AreEqual(FieldValueType.SelectionMark, form.Fields[name].Value.ValueType);
            Assert.AreEqual(SelectionMarkState.Selected, form.Fields[name].Value.AsSelectionMarkState());
        }
Esempio n. 3
0
        public async Task StartRecognizeCustomFormsWithoutLabels(bool useStream, bool includeFieldElements)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions {
                IncludeFieldElements = includeFieldElements
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : false);

            if (useStream)
            {
                using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.Form1);
                using (Recording.DisableRequestBodyRecording())
                {
                    operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
                }
            }
            else
            {
                var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.Form1);
                operation = await client.StartRecognizeCustomFormsFromUriAsync(trainedModel.ModelId, uri, options);
            }

            await operation.WaitForCompletionAsync();

            Assert.IsTrue(operation.HasValue);
            Assert.GreaterOrEqual(operation.Value.Count, 1);

            RecognizedForm form = operation.Value.Single();

            ValidateModelWithNoLabelsForm(
                form,
                trainedModel.ModelId,
                includeFieldElements: includeFieldElements,
                expectedFirstPageNumber: 1,
                expectedLastPageNumber: 1);

            //testing that we shuffle things around correctly so checking only once per property

            Assert.AreEqual("form-0", form.FormType);
            Assert.IsFalse(form.FormTypeConfidence.HasValue);
            Assert.AreEqual(1, form.Pages.Count);
            Assert.AreEqual(2200, form.Pages[0].Height);
            Assert.AreEqual(1, form.Pages[0].PageNumber);
            Assert.AreEqual(LengthUnit.Pixel, form.Pages[0].Unit);
            Assert.AreEqual(1700, form.Pages[0].Width);

            Assert.IsNotNull(form.Fields);
            var name = "field-0";

            Assert.IsNotNull(form.Fields[name]);
            Assert.IsNotNull(form.Fields[name].LabelData.Text);
            Assert.AreEqual(FieldValueType.String, form.Fields[name].Value.ValueType);

            // Disable this verification for now.
            // Issue https://github.com/Azure/azure-sdk-for-net/issues/15075
            // Assert.AreEqual("Hero Limited", form.Fields[name].LabelData.Text);
        }
Esempio n. 4
0
        public async Task StartRecognizeCustomFormsWithLabelsCanParseMultipageForm(bool useStream)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions()
            {
                IncludeFieldElements = true
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : true, ContainerType.MultipageFiles);

            if (useStream)
            {
                using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.InvoiceMultipage);
                using (Recording.DisableRequestBodyRecording())
                {
                    operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
                }
            }
            else
            {
                var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.InvoiceMultipage);
                operation = await client.StartRecognizeCustomFormsFromUriAsync(trainedModel.ModelId, uri, options);
            }

            RecognizedFormCollection recognizedForms = await operation.WaitForCompletionAsync();

            var recognizedForm = recognizedForms.Single();

            ValidateModelWithLabelsForm(
                recognizedForm,
                trainedModel.ModelId,
                includeFieldElements: true,
                expectedFirstPageNumber: 1,
                expectedLastPageNumber: 2,
                isComposedModel: false);

            // Check some values to make sure that fields from both pages are being populated.

            Assert.AreEqual("*****@*****.**", recognizedForm.Fields["Contact"].Value.AsString());
            Assert.AreEqual("Southridge Video", recognizedForm.Fields["CompanyName"].Value.AsString());
            Assert.AreEqual("$1,500", recognizedForm.Fields["Gold"].Value.AsString());
            Assert.AreEqual("$1,000", recognizedForm.Fields["Bronze"].Value.AsString());

            Assert.AreEqual(2, recognizedForm.Pages.Count);

            for (int pageIndex = 0; pageIndex < recognizedForm.Pages.Count; pageIndex++)
            {
                var formPage = recognizedForm.Pages[pageIndex];

                // Basic sanity test to make sure pages are ordered correctly.

                var sampleLine   = formPage.Lines[1];
                var expectedText = pageIndex == 0 ? "Vendor Registration" : "Vendor Details:";

                Assert.AreEqual(expectedText, sampleLine.Text);
            }
        }
Esempio n. 5
0
        public async Task StartRecognizeCustomFormsWithLabels(bool useStream, bool includeFieldElements)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions {
                IncludeFieldElements = includeFieldElements
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : true);

            if (useStream)
            {
                using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.Form1);
                using (Recording.DisableRequestBodyRecording())
                {
                    operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
                }
            }
            else
            {
                var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.Form1);
                operation = await client.StartRecognizeCustomFormsFromUriAsync(trainedModel.ModelId, uri, options);
            }

            await operation.WaitForCompletionAsync();

            Assert.IsTrue(operation.HasValue);
            Assert.GreaterOrEqual(operation.Value.Count, 1);

            RecognizedForm form = operation.Value.Single();

            ValidateModelWithLabelsForm(
                form,
                trainedModel.ModelId,
                includeFieldElements: includeFieldElements,
                expectedFirstPageNumber: 1,
                expectedLastPageNumber: 1,
                isComposedModel: false);

            // Testing that we shuffle things around correctly so checking only once per property.

            Assert.IsNotEmpty(form.FormType);
            Assert.IsTrue(form.FormTypeConfidence.HasValue);
            Assert.AreEqual(1, form.Pages.Count);
            Assert.AreEqual(2200, form.Pages[0].Height);
            Assert.AreEqual(1, form.Pages[0].PageNumber);
            Assert.AreEqual(LengthUnit.Pixel, form.Pages[0].Unit);
            Assert.AreEqual(1700, form.Pages[0].Width);

            Assert.IsNotNull(form.Fields);
            var name = "PurchaseOrderNumber";

            Assert.IsNotNull(form.Fields[name]);
            Assert.AreEqual(FieldValueType.String, form.Fields[name].Value.ValueType);
            Assert.AreEqual("948284", form.Fields[name].ValueData.Text);
        }
Esempio n. 6
0
        public async Task StartRecognizeCustomFormsWithoutLabelsCanParseMultipageForm(bool useStream)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions()
            {
                IncludeFieldElements = true
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : false, ContainerType.MultipageFiles);

            if (useStream)
            {
                using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.InvoiceMultipage);
                using (Recording.DisableRequestBodyRecording())
                {
                    operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
                }
            }
            else
            {
                var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.InvoiceMultipage);
                operation = await client.StartRecognizeCustomFormsFromUriAsync(trainedModel.ModelId, uri, options);
            }

            RecognizedFormCollection recognizedForms = await operation.WaitForCompletionAsync();

            Assert.AreEqual(2, recognizedForms.Count);

            for (int formIndex = 0; formIndex < recognizedForms.Count; formIndex++)
            {
                var recognizedForm     = recognizedForms[formIndex];
                var expectedPageNumber = formIndex + 1;

                ValidateModelWithNoLabelsForm(
                    recognizedForm,
                    trainedModel.ModelId,
                    includeFieldElements: true,
                    expectedFirstPageNumber: expectedPageNumber,
                    expectedLastPageNumber: expectedPageNumber);
            }

            // Basic sanity test to make sure pages are ordered correctly.

            FormPage  firstFormPage  = recognizedForms[0].Pages.Single();
            FormTable firstFormTable = firstFormPage.Tables.Single();

            Assert.True(firstFormTable.Cells.Any(c => c.Text == "Gold Sponsor"));

            FormField secondFormFieldInPage = recognizedForms[1].Fields.Values.Where(field => field.LabelData.Text.Contains("Company Name:")).FirstOrDefault();

            Assert.IsNotNull(secondFormFieldInPage);
            Assert.IsNotNull(secondFormFieldInPage.ValueData);
            Assert.AreEqual("Southridge Video", secondFormFieldInPage.ValueData.Text);
        }
Esempio n. 7
0
        public async Task StartRecognizeCustomFormsWithLabelsCanParseMultipageFormWithBlankPage(bool useStream)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions()
            {
                IncludeFieldElements = true
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : true);

            if (useStream)
            {
                using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.InvoiceMultipageBlank);
                using (Recording.DisableRequestBodyRecording())
                {
                    operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
                }
            }
            else
            {
                var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.InvoiceMultipageBlank);
                operation = await client.StartRecognizeCustomFormsFromUriAsync(trainedModel.ModelId, uri, options);
            }

            RecognizedFormCollection recognizedForms = await operation.WaitForCompletionAsync();

            var recognizedForm = recognizedForms.Single();

            ValidateModelWithLabelsForm(
                recognizedForm,
                trainedModel.ModelId,
                includeFieldElements: true,
                expectedFirstPageNumber: 1,
                expectedLastPageNumber: 3,
                isComposedModel: false);

            for (int pageIndex = 0; pageIndex < recognizedForm.Pages.Count; pageIndex++)
            {
                if (pageIndex == 0 || pageIndex == 2)
                {
                    var formPage     = recognizedForm.Pages[pageIndex];
                    var sampleLine   = formPage.Lines[3];
                    var expectedText = pageIndex == 0 ? "Bilbo Baggins" : "Frodo Baggins";

                    Assert.AreEqual(expectedText, sampleLine.Text);
                }
            }

            var blankPage = recognizedForm.Pages[1];

            Assert.AreEqual(0, blankPage.Lines.Count);
            Assert.AreEqual(0, blankPage.Tables.Count);
        }
        public void StartRecognizeCustomFormsValidatesTheModelIdFormat()
        {
            var client = CreateClient();

            using var stream = new MemoryStream(Array.Empty <byte>());
            var options = new RecognizeCustomFormsOptions {
                ContentType = FormContentType.Jpeg
            };

            Assert.ThrowsAsync <ArgumentException>(async() => await client.StartRecognizeCustomFormsAsync("1975-04-04", stream, options));
        }
        public void StartRecognizeCustomFormsRespectsTheCancellationToken()
        {
            var client  = CreateInstrumentedClient();
            var options = new RecognizeCustomFormsOptions {
                ContentType = FormContentType.Pdf
            };

            using var stream             = new MemoryStream(Array.Empty <byte>());
            using var cancellationSource = new CancellationTokenSource();
            cancellationSource.Cancel();

            Assert.ThrowsAsync(Is.InstanceOf <OperationCanceledException>(), async() => await client.StartRecognizeCustomFormsAsync("00000000-0000-0000-0000-000000000000", stream, recognizeCustomFormsOptions: options, cancellationToken: cancellationSource.Token));
        }
        private static async Task AnalyzeDynamicCustomForm(FormRecognizerClient recognizerClient, string trainingFileUrl, string formUrl)
        {
            RecognizeCustomFormsOptions options = new RecognizeCustomFormsOptions();

            //train model
            FormTrainingClient trainingClient = new FormTrainingClient(new Uri(endpoint), new AzureKeyCredential(apiKey));
            CustomFormModel    model          = await trainingClient.StartTrainingAsync(new Uri(trainingFileUrl), useTrainingLabels : false,
                                                                                        $"VIS-Dynamic-Model-{DateTime.Now.ToShortDateString()}-{DateTime.Now.ToLongTimeString()}").WaitForCompletionAsync();

            //string modelId = inModelID;
            string modelId = model.ModelId;

            //recognize form
            RecognizeCustomFormsOperation operation = await recognizerClient.StartRecognizeCustomFormsFromUriAsync(modelId, new Uri(formUrl));

            Response <RecognizedFormCollection> operationResponse = await operation.WaitForCompletionAsync();

            RecognizedFormCollection forms = operationResponse.Value;

            foreach (RecognizedForm form in forms)
            {
                returnString += $"Form of type: {form.FormType}{Environment.NewLine}";
                foreach (FormField field in form.Fields.Values)
                {
                    returnString += $"Field '{field.Name}: ";

                    if (field.LabelData != null)
                    {
                        returnString += $"    Label: '{field.LabelData.Text}";
                    }

                    returnString += $"    Value: '{field.ValueData.Text}";
                    returnString += $"    Confidence: '{field.Confidence}{Environment.NewLine}";
                }
                returnString += $"Table data:{Environment.NewLine}";
                foreach (FormPage page in form.Pages)
                {
                    for (int i = 0; i < page.Tables.Count; i++)
                    {
                        FormTable table = page.Tables[i];
                        //Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                        foreach (FormTableCell cell in table.Cells)
                        {
                            returnString += $"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains {(cell.IsHeader ? "header" : "text")}: '{cell.Text}'{Environment.NewLine}";
                        }
                    }
                }
            }
            // Delete the model on completion to clean environment.
            await trainingClient.DeleteModelAsync(model.ModelId);
        }
        public void StartRecognizeCustomFormsRequiresTheModelId(string modelId)
        {
            var client       = CreateInstrumentedClient();
            var expectedType = modelId == null
                ? typeof(ArgumentNullException)
                : typeof(ArgumentException);

            using var stream = new MemoryStream(Array.Empty <byte>());
            var options = new RecognizeCustomFormsOptions {
                ContentType = FormContentType.Jpeg
            };

            Assert.ThrowsAsync(expectedType, async() => await client.StartRecognizeCustomFormsAsync(modelId, stream, options));
        }
        public async Task StartRecognizeCustomFormsSendsUserSpecifiedContentType()
        {
            var mockResponse = new MockResponse(202);
            mockResponse.AddHeader(new HttpHeader(Constants.OperationLocationHeader, "host/custom/models/00000000000000000000000000000000/analyzeResults/00000000000000000000000000000000"));

            var mockTransport = new MockTransport(new[] { mockResponse, mockResponse });
            var options = new FormRecognizerClientOptions() { Transport = mockTransport };
            var client = CreateInstrumentedClient(options);

            using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.InvoiceLeTiff);
            var recognizeOptions = new RecognizeCustomFormsOptions { ContentType = FormContentType.Jpeg };
            await client.StartRecognizeCustomFormsAsync("00000000000000000000000000000000", stream, recognizeOptions);

            var request = mockTransport.Requests.Single();

            Assert.True(request.Headers.TryGetValue("Content-Type", out var contentType));
            Assert.AreEqual("image/jpeg", contentType);
        }
        public async Task StartRecognizeCustomFormsWithLabelsAndSelectionMarks(bool includeFieldElements)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions {
                IncludeFieldElements = includeFieldElements
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : true, ContainerType.SelectionMarks);

            using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.FormSelectionMarks);
            using (Recording.DisableRequestBodyRecording())
            {
                operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
            }

            await operation.WaitForCompletionAsync();

            Assert.IsTrue(operation.HasValue);
            Assert.GreaterOrEqual(operation.Value.Count, 1);

            RecognizedForm form = operation.Value.Single();

            ValidateRecognizedForm(form, includeFieldElements: includeFieldElements,
                                   expectedFirstPageNumber: 1, expectedLastPageNumber: 1);

            // Testing that we shuffle things around correctly so checking only once per property.
            Assert.IsNotEmpty(form.FormType);
            Assert.IsNotNull(form.Fields);
            var name = "AMEX_SELECTION_MARK";

            Assert.IsNotNull(form.Fields[name]);
            Assert.AreEqual(FieldValueType.SelectionMark, form.Fields[name].Value.ValueType);

            // If this assertion is failing after a recent update in the generated models, please remember
            // to update the manually added FieldValue_internal constructor in src/FieldValue_internal.cs if
            // necessary. The service originally returns "selected" and "unselected" as lowercase strings,
            // but we overwrite these values there. Consider removing this comment when:
            // https://github.com/Azure/azure-sdk-for-net/issues/17814 is fixed and the manually added constructor
            // is not needed anymore.
            Assert.AreEqual("Selected", form.Fields[name].ValueData.Text);
        }
        private static async Task AnalyzeCustomForm(FormRecognizerClient recognizerClient, string modelID, string formUrl)
        {
            RecognizeCustomFormsOptions options = new RecognizeCustomFormsOptions();

            //recognize form
            RecognizeCustomFormsOperation operation = await recognizerClient.StartRecognizeCustomFormsFromUriAsync(modelID, new Uri(formUrl));

            Response <RecognizedFormCollection> operationResponse = await operation.WaitForCompletionAsync();

            RecognizedFormCollection forms = operationResponse.Value;

            foreach (RecognizedForm form in forms)
            {
                returnString += $"Form of type: {form.FormType}{Environment.NewLine}";
                foreach (FormField field in form.Fields.Values)
                {
                    returnString += $"Field '{field.Name}: ";

                    if (field.LabelData != null)
                    {
                        returnString += $"    Label: '{field.LabelData.Text}";
                    }

                    returnString += $"    Value: '{field.ValueData.Text}";
                    returnString += $"    Confidence: '{field.Confidence}{Environment.NewLine}";
                }
                returnString += $"Table data:{Environment.NewLine}";
                foreach (FormPage page in form.Pages)
                {
                    for (int i = 0; i < page.Tables.Count; i++)
                    {
                        FormTable table = page.Tables[i];
                        //Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                        foreach (FormTableCell cell in table.Cells)
                        {
                            returnString += $"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains {(cell.IsHeader ? "header" : "text")}: '{cell.Text}'{Environment.NewLine}";
                        }
                    }
                }
            }
        }
Esempio n. 15
0
        public async Task RecognizeCustomFormsFromFile()
        {
            string endpoint        = TestEnvironment.Endpoint;
            string apiKey          = TestEnvironment.ApiKey;
            string trainingFileUrl = TestEnvironment.BlobContainerSasUrl;

            // Firstly, create a trained model we can use to recognize the custom form. Please note that
            // models can also be trained using a graphical user interface such as the Form Recognizer
            // Labeling Tool found here:
            // https://docs.microsoft.com/azure/cognitive-services/form-recognizer/label-tool?tabs=v2-1

            FormTrainingClient trainingClient = new FormTrainingClient(new Uri(endpoint), new AzureKeyCredential(apiKey));
            CustomFormModel    model          = await trainingClient.StartTrainingAsync(new Uri(trainingFileUrl), useTrainingLabels : false, "My Model").WaitForCompletionAsync();

            // Proceed with the custom form recognition.

            FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey));

            #region Snippet:FormRecognizerRecognizeCustomFormsFromFile
#if SNIPPET
            string modelId  = "<modelId>";
            string filePath = "<filePath>";
#else
            string filePath = FormRecognizerTestEnvironment.CreatePath("Form_1.jpg");
            string modelId  = model.ModelId;
#endif

            using var stream = new FileStream(filePath, FileMode.Open);
            var options = new RecognizeCustomFormsOptions()
            {
                IncludeFieldElements = true
            };

            RecognizeCustomFormsOperation operation = await client.StartRecognizeCustomFormsAsync(modelId, stream, options);

            Response <RecognizedFormCollection> operationResponse = await operation.WaitForCompletionAsync();

            RecognizedFormCollection forms = operationResponse.Value;

            foreach (RecognizedForm form in forms)
            {
                Console.WriteLine($"Form of type: {form.FormType}");
                Console.WriteLine($"Form was analyzed with model with ID: {form.ModelId}");
                foreach (FormField field in form.Fields.Values)
                {
                    Console.WriteLine($"Field '{field.Name}': ");

                    if (field.LabelData != null)
                    {
                        Console.WriteLine($"  Label: '{field.LabelData.Text}'");
                    }

                    Console.WriteLine($"  Value: '{field.ValueData.Text}'");
                    Console.WriteLine($"  Confidence: '{field.Confidence}'");
                }

                // Iterate over tables, lines, and selection marks on each page
                foreach (var page in form.Pages)
                {
                    for (int i = 0; i < page.Tables.Count; i++)
                    {
                        Console.WriteLine($"Table {i+1} on page {page.Tables[i].PageNumber}");
                        foreach (var cell in page.Tables[i].Cells)
                        {
                            Console.WriteLine($"  Cell[{cell.RowIndex}][{cell.ColumnIndex}] has text '{cell.Text}' with confidence {cell.Confidence}");
                        }
                    }
                    Console.WriteLine($"Lines found on page {page.PageNumber}");
                    foreach (var line in page.Lines)
                    {
                        Console.WriteLine($"  Line {line.Text}");
                    }

                    if (page.SelectionMarks.Count != 0)
                    {
                        Console.WriteLine($"Selection marks found on page {page.PageNumber}");
                        foreach (var selectionMark in page.SelectionMarks)
                        {
                            Console.WriteLine($"  Selection mark is '{selectionMark.State}' with confidence {selectionMark.Confidence}");
                        }
                    }
                }
            }
            #endregion

            // Delete the model on completion to clean environment.
            await trainingClient.DeleteModelAsync(model.ModelId);
        }
Esempio n. 16
0
        public async Task StartRecognizeCustomFormsWithoutLabelsCanParseMultipageFormWithBlankPage(bool useStream)
        {
            var client  = CreateFormRecognizerClient();
            var options = new RecognizeCustomFormsOptions()
            {
                IncludeFieldElements = true
            };
            RecognizeCustomFormsOperation operation;

            await using var trainedModel = await CreateDisposableTrainedModelAsync(useTrainingLabels : false);

            if (useStream)
            {
                using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.InvoiceMultipageBlank);
                using (Recording.DisableRequestBodyRecording())
                {
                    operation = await client.StartRecognizeCustomFormsAsync(trainedModel.ModelId, stream, options);
                }
            }
            else
            {
                var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.InvoiceMultipageBlank);
                operation = await client.StartRecognizeCustomFormsFromUriAsync(trainedModel.ModelId, uri, options);
            }

            RecognizedFormCollection recognizedForms = await operation.WaitForCompletionAsync();

            Assert.AreEqual(3, recognizedForms.Count);

            for (int formIndex = 0; formIndex < recognizedForms.Count; formIndex++)
            {
                var recognizedForm     = recognizedForms[formIndex];
                var expectedPageNumber = formIndex + 1;

                ValidateModelWithNoLabelsForm(
                    recognizedForm,
                    trainedModel.ModelId,
                    includeFieldElements: true,
                    expectedFirstPageNumber: expectedPageNumber,
                    expectedLastPageNumber: expectedPageNumber);

                // Basic sanity test to make sure pages are ordered correctly.

                if (formIndex == 0 || formIndex == 2)
                {
                    var expectedValueData = formIndex == 0 ? "300.00" : "3000.00";

                    FormField fieldInPage = recognizedForm.Fields.Values.Where(field => field.LabelData.Text.Contains("Subtotal:")).FirstOrDefault();
                    Assert.IsNotNull(fieldInPage);
                    Assert.IsNotNull(fieldInPage.ValueData);
                    Assert.AreEqual(expectedValueData, fieldInPage.ValueData.Text);
                }
            }

            var blankForm = recognizedForms[1];

            Assert.AreEqual(0, blankForm.Fields.Count);

            var blankPage = blankForm.Pages.Single();

            Assert.AreEqual(0, blankPage.Lines.Count);
            Assert.AreEqual(0, blankPage.Tables.Count);
        }
        private static async Task AnalyzeCustomFormReturnObj(FormRecognizerClient recognizerClient, string modelID, string formUrl)
        {
            RecognizeCustomFormsOptions options = new RecognizeCustomFormsOptions();

            //recognize form
            RecognizeCustomFormsOperation operation = await recognizerClient.StartRecognizeCustomFormsFromUriAsync(modelID, new Uri(formUrl));

            Response <RecognizedFormCollection> operationResponse = await operation.WaitForCompletionAsync();

            RecognizedFormCollection forms = operationResponse.Value;

            foreach (RecognizedForm form in forms)
            {
                FormResponseObj rObj = new FormResponseObj();

                foreach (FormField field in form.Fields.Values)
                {
                    switch (field.Name)
                    {
                    case "Birthdate":
                    {
                        rObj.Birthdate = field.ValueData.Text;
                    }
                    break;

                    case "Breed":
                    {
                        rObj.Breed = field.ValueData.Text;
                    }
                    break;

                    case "Color":
                    {
                        rObj.Color = field.ValueData.Text;
                    }
                    break;

                    case "DateOfVaccination":
                    {
                        rObj.DateOfVaccination = field.ValueData.Text;
                    }
                    break;

                    case "DueDate":
                    {
                        rObj.DueDate = field.ValueData.Text;
                    }
                    break;

                    case "Duration":
                    {
                        rObj.Duration = field.ValueData.Text;
                    }
                    break;

                    case "LotExpiration":
                    {
                        rObj.LotExpiration = field.ValueData.Text;
                    }
                    break;

                    case "Manufacturer":
                    {
                        rObj.Manufacturer = field.ValueData.Text;
                    }
                    break;

                    case "MicrochipNumber":
                    {
                        rObj.MicrochipNumber = field.ValueData.Text;
                    }
                    break;

                    case "Owner":
                    {
                        rObj.Owner = field.ValueData.Text;
                    }
                    break;

                    case "OwnerAddress":
                    {
                        rObj.OwnerAddress = field.ValueData.Text;
                    }
                    break;

                    case "OwnerPhone":
                    {
                        rObj.OwnerPhone = field.ValueData.Text;
                    }
                    break;

                    case "PetsName":
                    {
                        rObj.PetsName = field.ValueData.Text;
                    }
                    break;

                    case "SerialNumber":
                    {
                        rObj.SerialNumber = field.ValueData.Text;
                    }
                    break;

                    case "Sex":
                    {
                        rObj.Sex = field.ValueData.Text;
                    }
                    break;

                    case "Species":
                    {
                        rObj.Species = field.ValueData.Text;
                    }
                    break;

                    case "TagNumber":
                    {
                        rObj.TagNumber = field.ValueData.Text;
                    }
                    break;

                    case "VaccinationLocation":
                    {
                        rObj.VaccinationLocation = field.ValueData.Text;
                    }
                    break;

                    case "Weight":
                    {
                        rObj.Weight = field.ValueData.Text;
                    }
                    break;
                    }
                }

                returnObjects.Add(rObj);
            }
        }
Esempio n. 18
0
        public static async Task RunAsync([EventGridTrigger] EventGridEvent eventGridEvent, ILogger log)
        {
            //Extracting content type and url of the blob triggering the function
            var jsondata = JsonConvert.SerializeObject(eventGridEvent.Data);
            var tmp      = new { contentType = "", url = "" };
            var data     = JsonConvert.DeserializeAnonymousType(jsondata, tmp);

            //Checking if the trigger was iniatiated for a PDF File.
            if (data.contentType == "application/pdf")
            {
                var pdfUrl = data.url;

                string endpoint = System.Environment.GetEnvironmentVariable("FormsRecognizerEndpoint", EnvironmentVariableTarget.Process);
                string apiKey   = System.Environment.GetEnvironmentVariable("FormsRecognizerKey", EnvironmentVariableTarget.Process);
                string contosoStorageConnectionString = System.Environment.GetEnvironmentVariable("ContosoStorageConnectionString", EnvironmentVariableTarget.Process);
                string cosmosEndpointUrl = System.Environment.GetEnvironmentVariable("CosmosDBEndpointUrl", EnvironmentVariableTarget.Process);
                string cosmosPrimaryKey  = System.Environment.GetEnvironmentVariable("CosmosDBPrimaryKey", EnvironmentVariableTarget.Process);

                //Create a SAS Link to give Forms Recognizer read access to the document
                BlobServiceClient   blobServiceClient = new BlobServiceClient(contosoStorageConnectionString);
                BlobContainerClient container         = new BlobContainerClient(contosoStorageConnectionString, "claims");
                string         blobName   = pdfUrl.Split('/').Last();
                BlobClient     blob       = container.GetBlobClient(blobName);
                BlobSasBuilder sasBuilder = new BlobSasBuilder()
                {
                    BlobContainerName = blob.GetParentBlobContainerClient().Name,
                    BlobName          = blob.Name,
                    Resource          = "b"
                };
                sasBuilder.ExpiresOn = DateTimeOffset.UtcNow.AddHours(1);
                sasBuilder.SetPermissions(BlobSasPermissions.Read);
                Uri sasUri = blob.GenerateSasUri(sasBuilder);

                var credential = new AzureKeyCredential(apiKey);

                //Get the latest trained model
                var formTrainingClient = new FormTrainingClient(new Uri(endpoint), credential);
                Pageable <CustomFormModelInfo> formsModels = formTrainingClient.GetCustomModels();
                var latestModel = (from inc in formsModels orderby inc.TrainingCompletedOn descending select inc).FirstOrDefault();

                //Run the document through the model
                var formRecognizerClient = new FormRecognizerClient(new Uri(endpoint), credential);
                var options = new RecognizeCustomFormsOptions()
                {
                    IncludeFieldElements = true
                };
                RecognizeCustomFormsOperation             operation         = formRecognizerClient.StartRecognizeCustomFormsFromUri(latestModel.ModelId, sasUri, options);
                Azure.Response <RecognizedFormCollection> operationResponse = await operation.WaitForCompletionAsync();

                RecognizedFormCollection forms = operationResponse.Value;

                //Insert documents into CosmosDB
                var cosmosClient    = new CosmosClient(cosmosEndpointUrl, cosmosPrimaryKey);
                var cosmosDatabase  = (await cosmosClient.CreateDatabaseIfNotExistsAsync("Contoso")).Database;
                var cosmosContainer = (await cosmosDatabase.CreateContainerIfNotExistsAsync("Claims", "/InsuredID")).Container;

                Model.ClaimsDocument processedDocument = new Model.ClaimsDocument();
                processedDocument.DocumentDate = new DateTime(int.Parse(blobName.Substring(0, 4)),
                                                              int.Parse(blobName.Substring(4, 2)), int.Parse(blobName.Substring(6, 2)));
                processedDocument.PatientName = forms[0].Fields["PatientName"].ValueData?.Text;
                processedDocument.InsuredID   = forms[0].Fields["InsuredID"].ValueData?.Text;
                processedDocument.Diagnosis   = forms[0].Fields["Diagnosis"].ValueData?.Text;
                decimal.TryParse(forms[0].Fields["TotalCharges"].ValueData?.Text, out decimal totalCharges);
                processedDocument.TotalCharges = totalCharges;
                DateTime.TryParse(forms[0].Fields["PatientBirthDate"].ValueData?.Text, out DateTime patientBirthDate);
                processedDocument.PatientBirthDate = patientBirthDate;
                decimal.TryParse(forms[0].Fields["AmountPaid"].ValueData?.Text, out decimal amountPaid);
                processedDocument.AmountPaid = amountPaid;
                decimal.TryParse(forms[0].Fields["AmountDue"].ValueData?.Text, out decimal amountDue);
                processedDocument.AmountDue = amountDue;
                processedDocument.FileName  = blobName;
                if (processedDocument.InsuredID == null)
                {
                    processedDocument.Id = Guid.NewGuid().ToString();
                }
                else
                {
                    processedDocument.Id = processedDocument.InsuredID;
                }

                try
                {
                    ItemResponse <Model.ClaimsDocument> cosmosResponse = await
                                                                         cosmosContainer.CreateItemAsync(processedDocument, new PartitionKey(processedDocument.InsuredID));
                }
                catch (CosmosException ex) when(ex.StatusCode == System.Net.HttpStatusCode.Conflict)
                {
                    //Conflicting EnsurerID is silently ignored for demo purposes.
                }
            }
            log.LogInformation(eventGridEvent.Data.ToString());
        }