/// <summary>
        /// Add a new line to line dictionary.
        /// </summary>
        /// <param name="newLine">A new line.</param>
        /// <param name="lineDic">Line dictionary</param>
        void AddLineToList(ref FormLine newLine, ref SortedDictionary <double, FormLineList> lineDic)
        {
            double newAxisValue = newLine.IsTransverseLine ? newLine.StartPoint.y : newLine.StartPoint.x;

            double[] existAxisValues = lineDic.Keys.ToArray();
            double   findResult      = HalfFind(0, existAxisValues.Length - 1, newAxisValue, existAxisValues);

            if (findResult == -1)
            {
                FormLineList newList = new FormLineList()
                {
                    newLine
                };
                lineDic.Add(newAxisValue, newList);
            }
            else
            {
                FormLineList lineList = lineDic[findResult];
                lineList.Add(newLine);

                FormLineList matchLines = new FormLineList();
                foreach (FormLine line in lineList)
                {
                    if (HasRepeatPart(line, newLine))
                    {
                        matchLines.Add(line);
                    }
                }

                MergeLines(matchLines, lineList);
            }
        }
        private static async Task RecognizeContent(FormRecognizerClient recognizerClient, string formUrl)
        {
            FormPageCollection formPages = await recognizerClient
                                           .StartRecognizeContentFromUri(new Uri(formUrl))
                                           .WaitForCompletionAsync();

            foreach (FormPage page in formPages)
            {
                //lines
                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];

                    //returnString += $"{line.Text}{Environment.NewLine}";
                    returnString += $"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.{Environment.NewLine}";
                }
                //tables
                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    foreach (FormTableCell cell in table.Cells)
                    {
                        //returnString += $"{cell.Text} ";
                        returnString += $"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.{Environment.NewLine}";
                    }
                }
            }
        }
    // </snippet_calls>

    // <snippet_getcontent_call>
    private static async Task GetContent(
        FormRecognizerClient recognizerClient, string invoiceUri)
        {
        Response<FormPageCollection> formPages = await recognizerClient
            .StartRecognizeContentFromUri(new Uri(invoiceUri))
            .WaitForCompletionAsync();
        // </snippet_getcontent_call>

        // <snippet_getcontent_print>
        foreach (FormPage page in formPages.Value)
        {
            Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count}" + 
                $" lines.");
        
            for (int i = 0; i < page.Lines.Count; i++)
            {
                FormLine line = page.Lines[i];
                Console.WriteLine($"    Line {i} has {line.Words.Count}" + 
                    $" word{(line.Words.Count > 1 ? "s" : "")}," +
                    $" and text: '{line.Text}'.");
            }
        
            for (int i = 0; i < page.Tables.Count; i++)
            {
                FormTable table = page.Tables[i];
                Console.WriteLine($"Table {i} has {table.RowCount} rows and" +
                    $" {table.ColumnCount} columns.");
                foreach (FormTableCell cell in table.Cells)
                {
                    Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex})" +
                        $" contains text: '{cell.Text}'.");
                }
            }
        }
    }
        /// <summary>
        /// Merge two line to one line.
        /// </summary>
        /// <param name="mergeLines">Line to be merged.</param>
        void  MergeLines(FormLineList mergeLines, FormLineList sameLevelLines)
        {
            if (mergeLines == null || mergeLines.Count < 2)
            {
                return;
            }

            FormLine firstLine    = mergeLines[0];
            bool     isHorizontal = firstLine.IsTransverseLine;

            if (isHorizontal)
            {
                List <double> xValues = mergeLines.Select(line => line.StartPoint.x).
                                        Concat(mergeLines.Select(line => line.EndPoint.x)).ToList();
                firstLine.StartPoint.x = xValues.Min();
                firstLine.EndPoint.x   = xValues.Max();
            }
            else
            {
                List <double> yValues = mergeLines.Select(line => line.StartPoint.y).
                                        Concat(mergeLines.Select(line => line.EndPoint.y)).ToList();
                firstLine.StartPoint.y = yValues.Min();
                firstLine.EndPoint.y   = yValues.Max();
            }
            for (int i = 1; i < mergeLines.Count; i++)
            {
                sameLevelLines.Remove(mergeLines[i]);
            }
        }
Exemple #5
0
        private static async Task RecognizeContent(FormRecognizerClient recognizerClient)
        {
            var invoiceUri = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/master/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/forms/Invoice_1.pdf";
            FormPageCollection formPages = await recognizerClient
                                           .StartRecognizeContentFromUri(new Uri(invoiceUri))
                                           .WaitForCompletionAsync();

            foreach (FormPage page in formPages)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.");
                }

                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                    foreach (FormTableCell cell in table.Cells)
                    {
                        Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                    }
                }
            }
        }
        public async Task RecognizeContentFromFile()
        {
            string endpoint = TestEnvironment.Endpoint;
            string apiKey   = TestEnvironment.ApiKey;

            FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey));

            string invoiceFilePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf");

            using (FileStream stream = new FileStream(invoiceFilePath, FileMode.Open))
            {
                FormPageCollection formPages = await client.StartRecognizeContent(stream).WaitForCompletionAsync();

                foreach (FormPage page in formPages)
                {
                    Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                    for (int i = 0; i < page.Lines.Count; i++)
                    {
                        FormLine line = page.Lines[i];
                        Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.");
                    }

                    for (int i = 0; i < page.Tables.Count; i++)
                    {
                        FormTable table = page.Tables[i];
                        Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                        foreach (FormTableCell cell in table.Cells)
                        {
                            Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                        }
                    }
                }
            }
        }
    // </snippet_auth_training>

    // <snippet_getcontent_call>
    private static async Task RecognizeContent(FormRecognizerClient recognizerClient)
    {
        var invoiceUri = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/simple-invoice.png";
        FormPageCollection formPages = await recognizerClient
                                       .StartRecognizeContentFromUri(new Uri(invoiceUri))
                                       .WaitForCompletionAsync();

        // </snippet_getcontent_call>

        // <snippet_getcontent_print>
        foreach (FormPage page in formPages)
        {
            Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

            for (int i = 0; i < page.Lines.Count; i++)
            {
                FormLine line = page.Lines[i];
                Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.");
            }

            for (int i = 0; i < page.Tables.Count; i++)
            {
                FormTable table = page.Tables[i];
                Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                foreach (FormTableCell cell in table.Cells)
                {
                    Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                }
            }
        }
    }
Exemple #8
0
        public async Task FieldBoundingBoxSample()
        {
            string endpoint = TestEnvironment.Endpoint;
            string apiKey   = TestEnvironment.ApiKey;

            FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey));

            string invoiceFilePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf");

            using (FileStream stream = new FileStream(invoiceFilePath, FileMode.Open))
            {
                FormPageCollection formPages = await client.StartRecognizeContentAsync(stream).WaitForCompletionAsync();

                foreach (FormPage page in formPages)
                {
                    Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                    for (int i = 0; i < page.Lines.Count; i++)
                    {
                        FormLine line = page.Lines[i];
                        Console.WriteLine($"    Line {i} with text: '{line.Text}'.");

                        Console.WriteLine("        Its bounding box is:");
                        Console.WriteLine($"        Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}");
                        Console.WriteLine($"        Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}");
                        Console.WriteLine($"        Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}");
                        Console.WriteLine($"        Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}");
                    }
                }
            }
        }
Exemple #9
0
        private async Task RecognizeContent(FormRecognizerClient recognizerClient, string pPictureUri)
        {
            string             xLine     = "";
            FormPageCollection formPages = await recognizerClient
                                           .StartRecognizeContentFromUri(new Uri(pPictureUri))
                                           .WaitForCompletionAsync();

            foreach (FormPage page in formPages)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");
                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.");
                    if (xLine.Length > 15)
                    {
                        int distanceReceiptNo = CalcLevenshteinDistance(xLine.Substring(0, 15), "lfd. Nr./Zähler");
                        int distanceAmount    = CalcLevenshteinDistance(xLine.Substring(0, 5), "Menge");
                        if ((distanceReceiptNo != 0) && (distanceReceiptNo < 6))
                        {
                            newReceiptNo = line.Text;
                        }
                        else if ((distanceAmount != 0) && (distanceAmount < 2))
                        {
                            string[] amounts = line.Text.Split(" ");
                            newAmount = amounts[0];
                        }
                    }
                    else if (xLine.Length >= 5)
                    {
                        int distanceAmount = CalcLevenshteinDistance(xLine, "Menge");
                        if ((distanceAmount != 0) && (distanceAmount < 2))
                        {
                            string[] amounts = line.Text.Split(" ");
                            newAmount = amounts[0];
                        }
                    }

                    if (newAmount == "")
                    {
                        if (xLine.Contains("eng"))
                        {
                            string[] amounts = line.Text.Split(" ");
                            newAmount = amounts[0];
                        }
                    }
                    if (newReceiptNo == "")
                    {
                        if (xLine.Contains("hle"))
                        {
                            newReceiptNo = line.Text;
                        }
                    }
                    xLine = line.Text;
                }
            }
        }
Exemple #10
0
        public async Task RecognizeContentFromFile()
        {
            string endpoint = TestEnvironment.Endpoint;
            string apiKey   = TestEnvironment.ApiKey;

            FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey));

            string filePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf");

            #region Snippet:FormRecognizerRecognizeFormContentFromFile
            //@@ string filePath = "filePath";
            using var stream = new FileStream(filePath, FileMode.Open);

            Response <FormPageCollection> response = await client.StartRecognizeContentAsync(stream).WaitForCompletionAsync();

            FormPageCollection formPages = response.Value;

            foreach (FormPage page in formPages)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"  Line {i} has {line.Words.Count} {(line.Words.Count == 1 ? "word" : "words")}, and text: '{line.Text}'.");

                    Console.WriteLine("    Its bounding box is:");
                    Console.WriteLine($"    Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}");
                    Console.WriteLine($"    Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}");
                    Console.WriteLine($"    Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}");
                    Console.WriteLine($"    Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}");
                }

                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    Console.WriteLine($"  Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                    foreach (FormTableCell cell in table.Cells)
                    {
                        Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                    }
                }

                for (int i = 0; i < page.SelectionMarks.Count; i++)
                {
                    FormSelectionMark selectionMark = page.SelectionMarks[i];
                    Console.WriteLine($"  Selection Mark {i} is {selectionMark.State}.");
                    Console.WriteLine("    Its bounding box is:");
                    Console.WriteLine($"      Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}");
                    Console.WriteLine($"      Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}");
                    Console.WriteLine($"      Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}");
                    Console.WriteLine($"      Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}");
                }
            }

            #endregion
        }
 /// <summary>
 /// Add line information to list.
 /// </summary>
 /// <param name="newLine">A line object</param>
 void AddLine(FormLine newLine)
 {
     if (newLine.IsTransverseLine)
     {
         AddLineToList(ref newLine, ref horizontalLines);
     }
     else
     {
         AddLineToList(ref newLine, ref verticalLines);
     }
 }
        /// <summary>
        /// Deal the operation fo drawing line.
        /// </summary>
        /// <param name="fromPoint">One extreme point of the line</param>
        /// <param name="toPoint">The other extreme point of the line</param>
        void DealDrawLine(Point fromPoint, Point toPoint)
        {
            if (!ValidateInScale(fromPoint) || !ValidateInScale(toPoint))// || IsSamePoint(fromPoint, toPoint))
            {
                return;
            }

            FormLine newLine = new FormLine(fromPoint, toPoint, true);

            AddLine(newLine);
        }
        /// <summary>
        /// Indicate whether two line is on the same level.
        /// </summary>
        /// <param name="line1">One line</param>
        /// <param name="line2">The other line</param>
        /// <returns>If two line is on the same level,return true;Otherwise,return false.</returns>
        bool OnSameLine(FormLine line1, FormLine line2)
        {
            Func <double, double, bool> func = (value1, value2) =>
            {
                return(Math.Abs(value1 - value2) < lengthError);
            };

            if (line1.IsTransverseLine)
            {
                return(func(line1.EndPoint.y, line2.StartPoint.y));
            }
            return(func(line1.EndPoint.x, line2.StartPoint.x));
        }
 /// <summary>
 /// Indicate whether two line has repeat part.
 /// </summary>
 /// <param name="line1">One line</param>
 /// <param name="line2">The other line</param>
 /// <returns>If two line has repeat part.,return true;Otherwise,return false.</returns>
 bool HasRepeatPart(FormLine line1, FormLine line2)
 {
     if (line1.IsTransverseLine)
     {
         return(IsBetween(line1.StartPoint.x, line2.StartPoint.x, line2.EndPoint.x) ||
                IsBetween(line1.EndPoint.x, line2.StartPoint.x, line2.EndPoint.x) ||
                IsBetween(line2.StartPoint.x, line1.StartPoint.x, line1.EndPoint.x) ||
                IsBetween(line2.EndPoint.x, line1.StartPoint.x, line1.EndPoint.x));
     }
     return(IsBetween(line1.StartPoint.y, line2.StartPoint.y, line2.EndPoint.y) ||
            IsBetween(line1.EndPoint.y, line2.StartPoint.y, line2.EndPoint.y) ||
            IsBetween(line2.StartPoint.y, line1.StartPoint.y, line1.EndPoint.y) ||
            IsBetween(line2.EndPoint.y, line1.StartPoint.y, line1.EndPoint.y));
 }
Exemple #15
0
        public async Task RecognizeContentFromUri()
        {
            string endpoint = TestEnvironment.Endpoint;
            string apiKey   = TestEnvironment.ApiKey;

            FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey));

            Uri invoiceUri = FormRecognizerTestEnvironment.CreateUri("Invoice_1.pdf");

            #region Snippet:FormRecognizerSampleRecognizeContentFromUri

            FormPageCollection formPages = await client.StartRecognizeContentFromUriAsync(invoiceUri).WaitForCompletionAsync();

            foreach (FormPage page in formPages)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.");
                }

                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                    foreach (FormTableCell cell in table.Cells)
                    {
                        Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                    }
                }

                for (int i = 0; i < page.SelectionMarks.Count; i++)
                {
                    FormSelectionMark selectionMark = page.SelectionMarks[i];
                    Console.WriteLine($"Selection Mark {i} is {selectionMark.State.ToString()}.");
                    Console.WriteLine("        Its bounding box is:");
                    Console.WriteLine($"        Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}");
                    Console.WriteLine($"        Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}");
                    Console.WriteLine($"        Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}");
                    Console.WriteLine($"        Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}");
                }
            }

            #endregion
        }
Exemple #16
0
        private static async Task RecognizeContent(FormRecognizerClient recognizerClient)
        {
            var invoiceUri = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/simple-invoice.png";
            FormPageCollection formPages = await recognizerClient
                                           .StartRecognizeContentFromUri(new Uri(invoiceUri))
                                           .WaitForCompletionAsync();

            foreach (FormPage page in formPages)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "
            s " : "
            ")}, and text: '{line.Text}'.");
Exemple #17
0
        public FormPageCollection ValidateFormLayout(string expectedLayoutFilePath, string fileToBeValidatedPath, double delta = 0.1)
        {
            var expectedAnalyzedFile = AnalyzeFile(expectedLayoutFilePath);
            var actualAnalyzedFile   = AnalyzeFile(fileToBeValidatedPath);

            for (int i = 0; i < expectedAnalyzedFile.Count; i++)
            {
                Assert.AreEqual(expectedAnalyzedFile[i].Lines.Count, actualAnalyzedFile[i].Lines.Count, "The number of lines are different.");

                Logger.LogInformation($"Form Page {expectedAnalyzedFile[i].PageNumber} has {expectedAnalyzedFile[i].Lines.Count} lines.");

                for (int l = 0; i < expectedAnalyzedFile[i].Lines.Count; l++)
                {
                    FormLine expectedLine = expectedAnalyzedFile[i].Lines[l];
                    Logger.LogInformation($"  Expected Line {l} has {expectedLine.Words.Count} {(expectedLine.Words.Count == 1 ? "word" : "words")}, and text: '{expectedLine.Text}'.");
                    Logger.LogInformation("    Its bounding box is:");
                    Logger.LogInformation($"    Upper left => X: {expectedLine.BoundingBox[0].X}, Y= {expectedLine.BoundingBox[0].Y}");
                    Logger.LogInformation($"    Upper right => X: {expectedLine.BoundingBox[1].X}, Y= {expectedLine.BoundingBox[1].Y}");
                    Logger.LogInformation($"    Lower right => X: {expectedLine.BoundingBox[2].X}, Y= {expectedLine.BoundingBox[2].Y}");
                    Logger.LogInformation($"    Lower left => X: {expectedLine.BoundingBox[3].X}, Y= {expectedLine.BoundingBox[3].Y}");

                    FormLine actualLine = actualAnalyzedFile[i].Lines[l];
                    Logger.LogInformation($"  Expected Line {l} has {actualLine.Words.Count} {(actualLine.Words.Count == 1 ? "word" : "words")}, and text: '{actualLine.Text}'.");
                    Logger.LogInformation("    Its bounding box is:");
                    Logger.LogInformation($"    Upper left => X: {actualLine.BoundingBox[0].X}, Y= {actualLine.BoundingBox[0].Y}");
                    Logger.LogInformation($"    Upper right => X: {actualLine.BoundingBox[1].X}, Y= {actualLine.BoundingBox[1].Y}");
                    Logger.LogInformation($"    Lower right => X: {actualLine.BoundingBox[2].X}, Y= {actualLine.BoundingBox[2].Y}");
                    Logger.LogInformation($"    Lower left => X: {actualLine.BoundingBox[3].X}, Y= {actualLine.BoundingBox[3].Y}");

                    Assert.AreEqual(expectedLine.BoundingBox[0].X, actualLine.BoundingBox[0].X, delta);
                    Assert.AreEqual(expectedLine.BoundingBox[0].Y, actualLine.BoundingBox[0].Y, delta);

                    Assert.AreEqual(expectedLine.BoundingBox[1].X, actualLine.BoundingBox[1].X, delta);
                    Assert.AreEqual(expectedLine.BoundingBox[1].Y, actualLine.BoundingBox[1].Y, delta);

                    Assert.AreEqual(expectedLine.BoundingBox[2].X, actualLine.BoundingBox[2].X, delta);
                    Assert.AreEqual(expectedLine.BoundingBox[2].Y, actualLine.BoundingBox[2].Y, delta);

                    Assert.AreEqual(expectedLine.BoundingBox[3].X, actualLine.BoundingBox[3].X, delta);
                    Assert.AreEqual(expectedLine.BoundingBox[3].Y, actualLine.BoundingBox[3].Y, delta);
                }
            }

            return(actualAnalyzedFile);
        }
Exemple #18
0
        public static void PrintForm(FormPageCollection formPages)
        {
            foreach (FormPage page in formPages)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.");
                }

                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                    foreach (FormTableCell cell in table.Cells)
                    {
                        Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                    }
                }
            }
        }
        //public async Task RunFormRecognizerClient()
        //{


        //    string trainingDataUrl = "<SAS-URL-of-your-form-folder-in-blob-storage>";
        //    //string formUrl = "<SAS-URL-of-a-form-in-blob-storage>";

        //    string receiptUrl = "https://docs.microsoft.com/azure/cognitive-services/form-recognizer/media"
        //    + "/contoso-allinone.jpg";

        //    // Call Form Recognizer scenarios:
        //    Console.WriteLine("Get form content...");
        //    await GetContent(recognizerClient, formUrl);

        //    Console.WriteLine("Analyze receipt...");
        //    await AnalyzeReceipt(recognizerClient, receiptUrl);

        //    //Console.WriteLine("Train Model with training data...");
        //    //Guid modelId = await TrainModel(trainingClient, trainingDataUrl);

        //    //Console.WriteLine("Analyze PDF form...");
        //    //await AnalyzePdfForm(recognizerClient, modelId, formUrl);

        //    //Console.WriteLine("Manage models...");
        //    //await ManageModels(trainingClient, trainingDataUrl);
        //}

        public async Task <List <string> > ParseForm(string formurl)
        {
            List <string> retstr = new List <string>();

            retstr.Add("<h3>starting Output Rendering</h3>");
            //Response<IReadOnlyList<FormPage>> formPages = await recognizerClient.StartRecognizeContentFromUri(new Uri(invoiceUri)).WaitForCompletionAsync();
            FormRecognizerClient recognizerClient = new FormRecognizerClient(new Uri(endpoint), credential);
            var formPages = await recognizerClient.StartRecognizeContentFromUri(new Uri(formurl)).WaitForCompletionAsync();

            foreach (FormPage page in formPages.Value)
            {
                retstr.Add($"Form Page {page.PageNumber} has {page.Lines.Count}" + $" lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    retstr.Add($"    Line {i} has {line.Words.Count}" +
                               $" word{(line.Words.Count > 1 ? "s" : "")}," +
                               $" and text: '{line.Text}'.");
                }

                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    retstr.Add($"Table {i} has {table.RowCount} rows and" +
                               $" {table.ColumnCount} columns.");
                    foreach (FormTableCell cell in table.Cells)
                    {
                        retstr.Add($"    Cell ({cell.RowIndex}, {cell.ColumnIndex})" +
                                   $" contains text: '{cell.Text}'.");
                    }
                }
            }
            retstr.Add("<h3>End Output Rendering</h3>");

            return(retstr);
        }
        public async Task RecognizeContentFromUri()
        {
            string endpoint = TestEnvironment.Endpoint;
            string apiKey   = TestEnvironment.ApiKey;

            FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey));

            string invoiceUri = FormRecognizerTestEnvironment.CreateUri("Invoice_1.pdf");

            #region Snippet:FormRecognizerSampleRecognizeContentFromUri

            Response <IReadOnlyList <FormPage> > formPages = await client.StartRecognizeContentFromUri(new Uri(invoiceUri)).WaitForCompletionAsync();

            foreach (FormPage page in formPages.Value)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"    Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.");
                }

                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                    foreach (FormTableCell cell in table.Cells)
                    {
                        Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                    }
                }
            }

            #endregion
        }
        public async Task RecognizeContentFromFile()
        {
            string endpoint = TestEnvironment.Endpoint;
            string apiKey   = TestEnvironment.ApiKey;

            FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey));

            #region Snippet:FormRecognizerRecognizeFormContentFromFile
#if SNIPPET
            string filePath = "<filePath>";
#else
            string filePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf");
#endif
            using var stream = new FileStream(filePath, FileMode.Open);

            Response <FormPageCollection> response = await client.StartRecognizeContentAsync(stream).WaitForCompletionAsync();

            FormPageCollection formPages = response.Value;

            foreach (FormPage page in formPages)
            {
                Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines.");

                for (int i = 0; i < page.Lines.Count; i++)
                {
                    FormLine line = page.Lines[i];
                    Console.WriteLine($"  Line {i} has {line.Words.Count} {(line.Words.Count == 1 ? "word" : "words")}, and text: '{line.Text}'.");

                    if (line.Appearance != null)
                    {
                        // Check the style and style confidence to see if text is handwritten.
                        // Note that value '0.8' is used as an example.
                        if (line.Appearance.Style.Name == TextStyleName.Handwriting && line.Appearance.Style.Confidence > 0.8)
                        {
                            Console.WriteLine("The text is handwritten");
                        }
                    }

                    Console.WriteLine("    Its bounding box is:");
                    Console.WriteLine($"    Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}");
                    Console.WriteLine($"    Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}");
                    Console.WriteLine($"    Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}");
                    Console.WriteLine($"    Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}");
                }

                for (int i = 0; i < page.Tables.Count; i++)
                {
                    FormTable table = page.Tables[i];
                    Console.WriteLine($"  Table {i} has {table.RowCount} rows and {table.ColumnCount} columns.");
                    foreach (FormTableCell cell in table.Cells)
                    {
                        Console.WriteLine($"    Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.");
                    }
                }

                for (int i = 0; i < page.SelectionMarks.Count; i++)
                {
                    FormSelectionMark selectionMark = page.SelectionMarks[i];
                    Console.WriteLine($"  Selection Mark {i} is {selectionMark.State}.");
                    Console.WriteLine("    Its bounding box is:");
                    Console.WriteLine($"      Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}");
                    Console.WriteLine($"      Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}");
                    Console.WriteLine($"      Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}");
                    Console.WriteLine($"      Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}");
                }
            }

            #endregion
        }
Exemple #22
0
 public AssertedTableFormLine(FormLine formLine)
 {
     _formLine = formLine;
 }
        /// <summary>
        /// Remove short lines from line dictionary.
        /// </summary>
        /// <param name="diclines">Line dictionary</param>
        void RemoveTooShortAndTooLongLines(Page page, SortedDictionary <double, FormLineList> diclines, bool isHorizontial, Rect posRect)
        {
            double[] pageSize = PdfTronHelper.GetPageSize(page);

            double maxLength = isHorizontial ? pageSize[0] : pageSize[1];

            diclines.Where(pair => pair.Value.Exists(line => Math.Abs(line.Length - maxLength) < 3))
            .Select(pair => pair.Key).ToList()
            .ForEach(key => diclines.Remove(key));
            FormLineList _lines = new FormLineList(diclines.SelectMany(pair => pair.Value).ToList());

            if (_lines.Count > 1)
            {
                if (isHorizontial)
                {
                    double[] textLeftRightXValue = pdfTronHelper.GetLeftRightTextBounds(page);
                    maxLength = (textLeftRightXValue[1] - textLeftRightXValue[0]);
                    diclines.Where(x => x.Value.Sum(line => line.Length) < maxLength * 0.5
                                   ).Select(x => x.Key).ToList().ForEach(key => diclines.Remove(key));
                    foreach (double key in diclines.Keys.ToArray())
                    {
                        FormLineList lines = diclines[key];
                        if (lines.Count < 2)
                        {
                            continue;
                        }
                        double   _maxLength    = lines.Max(line => line.Length);
                        FormLine maxLengthLine = lines.Find(line => line.Length == _maxLength);
                        lines.Where(line => line.Length < (_maxLength * 0.7)).ToList().ForEach(line => lines.Remove(line));
                    }
                    FormLineList templines = new FormLineList(diclines.SelectMany(pair => pair.Value).ToList());

                    if (templines.Count > 1)
                    {
                        maxLength = templines.Select(line => line.Length).Max();
                        double scale     = 0.4;
                        double minLength = maxLength * scale;
                        IEnumerable <double> shortLineKeys = diclines.Where(
                            x => x.Value.Sum(line => line.Length) < minLength
                            ).Select(x => x.Key);
                        shortLineKeys.ToList().ForEach(key => diclines.Remove(key));
                    }
                }
                else
                {
                    maxLength = posRect.Height();
                    if (posRect.Height() < 300)
                    {
                        maxLength = _lines.Select(line => line.Length).Max();
                    }
                    double minLength = maxLength * 0.4;

                    if (minLength < 9)
                    {
                        minLength = 9;
                    }

                    IEnumerable <double> shortLineKeys = diclines.Where(
                        x => x.Value.Sum(line => line.Length) < minLength
                        ).Select(x => x.Key);
                    shortLineKeys.ToList().ForEach(key => diclines.Remove(key));
                }
            }
            else
            {
                diclines.Clear();
            }
        }