/// <summary> /// Add a new line to line dictionary. /// </summary> /// <param name="newLine">A new line.</param> /// <param name="lineDic">Line dictionary</param> void AddLineToList(ref FormLine newLine, ref SortedDictionary <double, FormLineList> lineDic) { double newAxisValue = newLine.IsTransverseLine ? newLine.StartPoint.y : newLine.StartPoint.x; double[] existAxisValues = lineDic.Keys.ToArray(); double findResult = HalfFind(0, existAxisValues.Length - 1, newAxisValue, existAxisValues); if (findResult == -1) { FormLineList newList = new FormLineList() { newLine }; lineDic.Add(newAxisValue, newList); } else { FormLineList lineList = lineDic[findResult]; lineList.Add(newLine); FormLineList matchLines = new FormLineList(); foreach (FormLine line in lineList) { if (HasRepeatPart(line, newLine)) { matchLines.Add(line); } } MergeLines(matchLines, lineList); } }
private static async Task RecognizeContent(FormRecognizerClient recognizerClient, string formUrl) { FormPageCollection formPages = await recognizerClient .StartRecognizeContentFromUri(new Uri(formUrl)) .WaitForCompletionAsync(); foreach (FormPage page in formPages) { //lines for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; //returnString += $"{line.Text}{Environment.NewLine}"; returnString += $" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'.{Environment.NewLine}"; } //tables for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; foreach (FormTableCell cell in table.Cells) { //returnString += $"{cell.Text} "; returnString += $" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'.{Environment.NewLine}"; } } } }
// </snippet_calls> // <snippet_getcontent_call> private static async Task GetContent( FormRecognizerClient recognizerClient, string invoiceUri) { Response<FormPageCollection> formPages = await recognizerClient .StartRecognizeContentFromUri(new Uri(invoiceUri)) .WaitForCompletionAsync(); // </snippet_getcontent_call> // <snippet_getcontent_print> foreach (FormPage page in formPages.Value) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count}" + $" lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count}" + $" word{(line.Words.Count > 1 ? "s" : "")}," + $" and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($"Table {i} has {table.RowCount} rows and" + $" {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex})" + $" contains text: '{cell.Text}'."); } } } }
/// <summary> /// Merge two line to one line. /// </summary> /// <param name="mergeLines">Line to be merged.</param> void MergeLines(FormLineList mergeLines, FormLineList sameLevelLines) { if (mergeLines == null || mergeLines.Count < 2) { return; } FormLine firstLine = mergeLines[0]; bool isHorizontal = firstLine.IsTransverseLine; if (isHorizontal) { List <double> xValues = mergeLines.Select(line => line.StartPoint.x). Concat(mergeLines.Select(line => line.EndPoint.x)).ToList(); firstLine.StartPoint.x = xValues.Min(); firstLine.EndPoint.x = xValues.Max(); } else { List <double> yValues = mergeLines.Select(line => line.StartPoint.y). Concat(mergeLines.Select(line => line.EndPoint.y)).ToList(); firstLine.StartPoint.y = yValues.Min(); firstLine.EndPoint.y = yValues.Max(); } for (int i = 1; i < mergeLines.Count; i++) { sameLevelLines.Remove(mergeLines[i]); } }
private static async Task RecognizeContent(FormRecognizerClient recognizerClient) { var invoiceUri = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/master/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/forms/Invoice_1.pdf"; FormPageCollection formPages = await recognizerClient .StartRecognizeContentFromUri(new Uri(invoiceUri)) .WaitForCompletionAsync(); foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } } }
public async Task RecognizeContentFromFile() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); string invoiceFilePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf"); using (FileStream stream = new FileStream(invoiceFilePath, FileMode.Open)) { FormPageCollection formPages = await client.StartRecognizeContent(stream).WaitForCompletionAsync(); foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } } } }
// </snippet_auth_training> // <snippet_getcontent_call> private static async Task RecognizeContent(FormRecognizerClient recognizerClient) { var invoiceUri = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/simple-invoice.png"; FormPageCollection formPages = await recognizerClient .StartRecognizeContentFromUri(new Uri(invoiceUri)) .WaitForCompletionAsync(); // </snippet_getcontent_call> // <snippet_getcontent_print> foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } } }
public async Task FieldBoundingBoxSample() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); string invoiceFilePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf"); using (FileStream stream = new FileStream(invoiceFilePath, FileMode.Open)) { FormPageCollection formPages = await client.StartRecognizeContentAsync(stream).WaitForCompletionAsync(); foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} with text: '{line.Text}'."); Console.WriteLine(" Its bounding box is:"); Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); } } } }
private async Task RecognizeContent(FormRecognizerClient recognizerClient, string pPictureUri) { string xLine = ""; FormPageCollection formPages = await recognizerClient .StartRecognizeContentFromUri(new Uri(pPictureUri)) .WaitForCompletionAsync(); foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'."); if (xLine.Length > 15) { int distanceReceiptNo = CalcLevenshteinDistance(xLine.Substring(0, 15), "lfd. Nr./Zähler"); int distanceAmount = CalcLevenshteinDistance(xLine.Substring(0, 5), "Menge"); if ((distanceReceiptNo != 0) && (distanceReceiptNo < 6)) { newReceiptNo = line.Text; } else if ((distanceAmount != 0) && (distanceAmount < 2)) { string[] amounts = line.Text.Split(" "); newAmount = amounts[0]; } } else if (xLine.Length >= 5) { int distanceAmount = CalcLevenshteinDistance(xLine, "Menge"); if ((distanceAmount != 0) && (distanceAmount < 2)) { string[] amounts = line.Text.Split(" "); newAmount = amounts[0]; } } if (newAmount == "") { if (xLine.Contains("eng")) { string[] amounts = line.Text.Split(" "); newAmount = amounts[0]; } } if (newReceiptNo == "") { if (xLine.Contains("hle")) { newReceiptNo = line.Text; } } xLine = line.Text; } } }
public async Task RecognizeContentFromFile() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); string filePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf"); #region Snippet:FormRecognizerRecognizeFormContentFromFile //@@ string filePath = "filePath"; using var stream = new FileStream(filePath, FileMode.Open); Response <FormPageCollection> response = await client.StartRecognizeContentAsync(stream).WaitForCompletionAsync(); FormPageCollection formPages = response.Value; foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} {(line.Words.Count == 1 ? "word" : "words")}, and text: '{line.Text}'."); Console.WriteLine(" Its bounding box is:"); Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } for (int i = 0; i < page.SelectionMarks.Count; i++) { FormSelectionMark selectionMark = page.SelectionMarks[i]; Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); Console.WriteLine(" Its bounding box is:"); Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); } } #endregion }
/// <summary> /// Add line information to list. /// </summary> /// <param name="newLine">A line object</param> void AddLine(FormLine newLine) { if (newLine.IsTransverseLine) { AddLineToList(ref newLine, ref horizontalLines); } else { AddLineToList(ref newLine, ref verticalLines); } }
/// <summary> /// Deal the operation fo drawing line. /// </summary> /// <param name="fromPoint">One extreme point of the line</param> /// <param name="toPoint">The other extreme point of the line</param> void DealDrawLine(Point fromPoint, Point toPoint) { if (!ValidateInScale(fromPoint) || !ValidateInScale(toPoint))// || IsSamePoint(fromPoint, toPoint)) { return; } FormLine newLine = new FormLine(fromPoint, toPoint, true); AddLine(newLine); }
/// <summary> /// Indicate whether two line is on the same level. /// </summary> /// <param name="line1">One line</param> /// <param name="line2">The other line</param> /// <returns>If two line is on the same level,return true;Otherwise,return false.</returns> bool OnSameLine(FormLine line1, FormLine line2) { Func <double, double, bool> func = (value1, value2) => { return(Math.Abs(value1 - value2) < lengthError); }; if (line1.IsTransverseLine) { return(func(line1.EndPoint.y, line2.StartPoint.y)); } return(func(line1.EndPoint.x, line2.StartPoint.x)); }
/// <summary> /// Indicate whether two line has repeat part. /// </summary> /// <param name="line1">One line</param> /// <param name="line2">The other line</param> /// <returns>If two line has repeat part.,return true;Otherwise,return false.</returns> bool HasRepeatPart(FormLine line1, FormLine line2) { if (line1.IsTransverseLine) { return(IsBetween(line1.StartPoint.x, line2.StartPoint.x, line2.EndPoint.x) || IsBetween(line1.EndPoint.x, line2.StartPoint.x, line2.EndPoint.x) || IsBetween(line2.StartPoint.x, line1.StartPoint.x, line1.EndPoint.x) || IsBetween(line2.EndPoint.x, line1.StartPoint.x, line1.EndPoint.x)); } return(IsBetween(line1.StartPoint.y, line2.StartPoint.y, line2.EndPoint.y) || IsBetween(line1.EndPoint.y, line2.StartPoint.y, line2.EndPoint.y) || IsBetween(line2.StartPoint.y, line1.StartPoint.y, line1.EndPoint.y) || IsBetween(line2.EndPoint.y, line1.StartPoint.y, line1.EndPoint.y)); }
public async Task RecognizeContentFromUri() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); Uri invoiceUri = FormRecognizerTestEnvironment.CreateUri("Invoice_1.pdf"); #region Snippet:FormRecognizerSampleRecognizeContentFromUri FormPageCollection formPages = await client.StartRecognizeContentFromUriAsync(invoiceUri).WaitForCompletionAsync(); foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } for (int i = 0; i < page.SelectionMarks.Count; i++) { FormSelectionMark selectionMark = page.SelectionMarks[i]; Console.WriteLine($"Selection Mark {i} is {selectionMark.State.ToString()}."); Console.WriteLine(" Its bounding box is:"); Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); } } #endregion }
private static async Task RecognizeContent(FormRecognizerClient recognizerClient) { var invoiceUri = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/simple-invoice.png"; FormPageCollection formPages = await recognizerClient .StartRecognizeContentFromUri(new Uri(invoiceUri)) .WaitForCompletionAsync(); foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? " s " : " ")}, and text: '{line.Text}'.");
public FormPageCollection ValidateFormLayout(string expectedLayoutFilePath, string fileToBeValidatedPath, double delta = 0.1) { var expectedAnalyzedFile = AnalyzeFile(expectedLayoutFilePath); var actualAnalyzedFile = AnalyzeFile(fileToBeValidatedPath); for (int i = 0; i < expectedAnalyzedFile.Count; i++) { Assert.AreEqual(expectedAnalyzedFile[i].Lines.Count, actualAnalyzedFile[i].Lines.Count, "The number of lines are different."); Logger.LogInformation($"Form Page {expectedAnalyzedFile[i].PageNumber} has {expectedAnalyzedFile[i].Lines.Count} lines."); for (int l = 0; i < expectedAnalyzedFile[i].Lines.Count; l++) { FormLine expectedLine = expectedAnalyzedFile[i].Lines[l]; Logger.LogInformation($" Expected Line {l} has {expectedLine.Words.Count} {(expectedLine.Words.Count == 1 ? "word" : "words")}, and text: '{expectedLine.Text}'."); Logger.LogInformation(" Its bounding box is:"); Logger.LogInformation($" Upper left => X: {expectedLine.BoundingBox[0].X}, Y= {expectedLine.BoundingBox[0].Y}"); Logger.LogInformation($" Upper right => X: {expectedLine.BoundingBox[1].X}, Y= {expectedLine.BoundingBox[1].Y}"); Logger.LogInformation($" Lower right => X: {expectedLine.BoundingBox[2].X}, Y= {expectedLine.BoundingBox[2].Y}"); Logger.LogInformation($" Lower left => X: {expectedLine.BoundingBox[3].X}, Y= {expectedLine.BoundingBox[3].Y}"); FormLine actualLine = actualAnalyzedFile[i].Lines[l]; Logger.LogInformation($" Expected Line {l} has {actualLine.Words.Count} {(actualLine.Words.Count == 1 ? "word" : "words")}, and text: '{actualLine.Text}'."); Logger.LogInformation(" Its bounding box is:"); Logger.LogInformation($" Upper left => X: {actualLine.BoundingBox[0].X}, Y= {actualLine.BoundingBox[0].Y}"); Logger.LogInformation($" Upper right => X: {actualLine.BoundingBox[1].X}, Y= {actualLine.BoundingBox[1].Y}"); Logger.LogInformation($" Lower right => X: {actualLine.BoundingBox[2].X}, Y= {actualLine.BoundingBox[2].Y}"); Logger.LogInformation($" Lower left => X: {actualLine.BoundingBox[3].X}, Y= {actualLine.BoundingBox[3].Y}"); Assert.AreEqual(expectedLine.BoundingBox[0].X, actualLine.BoundingBox[0].X, delta); Assert.AreEqual(expectedLine.BoundingBox[0].Y, actualLine.BoundingBox[0].Y, delta); Assert.AreEqual(expectedLine.BoundingBox[1].X, actualLine.BoundingBox[1].X, delta); Assert.AreEqual(expectedLine.BoundingBox[1].Y, actualLine.BoundingBox[1].Y, delta); Assert.AreEqual(expectedLine.BoundingBox[2].X, actualLine.BoundingBox[2].X, delta); Assert.AreEqual(expectedLine.BoundingBox[2].Y, actualLine.BoundingBox[2].Y, delta); Assert.AreEqual(expectedLine.BoundingBox[3].X, actualLine.BoundingBox[3].X, delta); Assert.AreEqual(expectedLine.BoundingBox[3].Y, actualLine.BoundingBox[3].Y, delta); } } return(actualAnalyzedFile); }
public static void PrintForm(FormPageCollection formPages) { foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } } }
//public async Task RunFormRecognizerClient() //{ // string trainingDataUrl = "<SAS-URL-of-your-form-folder-in-blob-storage>"; // //string formUrl = "<SAS-URL-of-a-form-in-blob-storage>"; // string receiptUrl = "https://docs.microsoft.com/azure/cognitive-services/form-recognizer/media" // + "/contoso-allinone.jpg"; // // Call Form Recognizer scenarios: // Console.WriteLine("Get form content..."); // await GetContent(recognizerClient, formUrl); // Console.WriteLine("Analyze receipt..."); // await AnalyzeReceipt(recognizerClient, receiptUrl); // //Console.WriteLine("Train Model with training data..."); // //Guid modelId = await TrainModel(trainingClient, trainingDataUrl); // //Console.WriteLine("Analyze PDF form..."); // //await AnalyzePdfForm(recognizerClient, modelId, formUrl); // //Console.WriteLine("Manage models..."); // //await ManageModels(trainingClient, trainingDataUrl); //} public async Task <List <string> > ParseForm(string formurl) { List <string> retstr = new List <string>(); retstr.Add("<h3>starting Output Rendering</h3>"); //Response<IReadOnlyList<FormPage>> formPages = await recognizerClient.StartRecognizeContentFromUri(new Uri(invoiceUri)).WaitForCompletionAsync(); FormRecognizerClient recognizerClient = new FormRecognizerClient(new Uri(endpoint), credential); var formPages = await recognizerClient.StartRecognizeContentFromUri(new Uri(formurl)).WaitForCompletionAsync(); foreach (FormPage page in formPages.Value) { retstr.Add($"Form Page {page.PageNumber} has {page.Lines.Count}" + $" lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; retstr.Add($" Line {i} has {line.Words.Count}" + $" word{(line.Words.Count > 1 ? "s" : "")}," + $" and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; retstr.Add($"Table {i} has {table.RowCount} rows and" + $" {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { retstr.Add($" Cell ({cell.RowIndex}, {cell.ColumnIndex})" + $" contains text: '{cell.Text}'."); } } } retstr.Add("<h3>End Output Rendering</h3>"); return(retstr); }
public async Task RecognizeContentFromUri() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); string invoiceUri = FormRecognizerTestEnvironment.CreateUri("Invoice_1.pdf"); #region Snippet:FormRecognizerSampleRecognizeContentFromUri Response <IReadOnlyList <FormPage> > formPages = await client.StartRecognizeContentFromUri(new Uri(invoiceUri)).WaitForCompletionAsync(); foreach (FormPage page in formPages.Value) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} word{(line.Words.Count > 1 ? "s" : "")}, and text: '{line.Text}'."); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($"Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } } #endregion }
public async Task RecognizeContentFromFile() { string endpoint = TestEnvironment.Endpoint; string apiKey = TestEnvironment.ApiKey; FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); #region Snippet:FormRecognizerRecognizeFormContentFromFile #if SNIPPET string filePath = "<filePath>"; #else string filePath = FormRecognizerTestEnvironment.CreatePath("Invoice_1.pdf"); #endif using var stream = new FileStream(filePath, FileMode.Open); Response <FormPageCollection> response = await client.StartRecognizeContentAsync(stream).WaitForCompletionAsync(); FormPageCollection formPages = response.Value; foreach (FormPage page in formPages) { Console.WriteLine($"Form Page {page.PageNumber} has {page.Lines.Count} lines."); for (int i = 0; i < page.Lines.Count; i++) { FormLine line = page.Lines[i]; Console.WriteLine($" Line {i} has {line.Words.Count} {(line.Words.Count == 1 ? "word" : "words")}, and text: '{line.Text}'."); if (line.Appearance != null) { // Check the style and style confidence to see if text is handwritten. // Note that value '0.8' is used as an example. if (line.Appearance.Style.Name == TextStyleName.Handwriting && line.Appearance.Style.Confidence > 0.8) { Console.WriteLine("The text is handwritten"); } } Console.WriteLine(" Its bounding box is:"); Console.WriteLine($" Upper left => X: {line.BoundingBox[0].X}, Y= {line.BoundingBox[0].Y}"); Console.WriteLine($" Upper right => X: {line.BoundingBox[1].X}, Y= {line.BoundingBox[1].Y}"); Console.WriteLine($" Lower right => X: {line.BoundingBox[2].X}, Y= {line.BoundingBox[2].Y}"); Console.WriteLine($" Lower left => X: {line.BoundingBox[3].X}, Y= {line.BoundingBox[3].Y}"); } for (int i = 0; i < page.Tables.Count; i++) { FormTable table = page.Tables[i]; Console.WriteLine($" Table {i} has {table.RowCount} rows and {table.ColumnCount} columns."); foreach (FormTableCell cell in table.Cells) { Console.WriteLine($" Cell ({cell.RowIndex}, {cell.ColumnIndex}) contains text: '{cell.Text}'."); } } for (int i = 0; i < page.SelectionMarks.Count; i++) { FormSelectionMark selectionMark = page.SelectionMarks[i]; Console.WriteLine($" Selection Mark {i} is {selectionMark.State}."); Console.WriteLine(" Its bounding box is:"); Console.WriteLine($" Upper left => X: {selectionMark.BoundingBox[0].X}, Y= {selectionMark.BoundingBox[0].Y}"); Console.WriteLine($" Upper right => X: {selectionMark.BoundingBox[1].X}, Y= {selectionMark.BoundingBox[1].Y}"); Console.WriteLine($" Lower right => X: {selectionMark.BoundingBox[2].X}, Y= {selectionMark.BoundingBox[2].Y}"); Console.WriteLine($" Lower left => X: {selectionMark.BoundingBox[3].X}, Y= {selectionMark.BoundingBox[3].Y}"); } } #endregion }
public AssertedTableFormLine(FormLine formLine) { _formLine = formLine; }
/// <summary> /// Remove short lines from line dictionary. /// </summary> /// <param name="diclines">Line dictionary</param> void RemoveTooShortAndTooLongLines(Page page, SortedDictionary <double, FormLineList> diclines, bool isHorizontial, Rect posRect) { double[] pageSize = PdfTronHelper.GetPageSize(page); double maxLength = isHorizontial ? pageSize[0] : pageSize[1]; diclines.Where(pair => pair.Value.Exists(line => Math.Abs(line.Length - maxLength) < 3)) .Select(pair => pair.Key).ToList() .ForEach(key => diclines.Remove(key)); FormLineList _lines = new FormLineList(diclines.SelectMany(pair => pair.Value).ToList()); if (_lines.Count > 1) { if (isHorizontial) { double[] textLeftRightXValue = pdfTronHelper.GetLeftRightTextBounds(page); maxLength = (textLeftRightXValue[1] - textLeftRightXValue[0]); diclines.Where(x => x.Value.Sum(line => line.Length) < maxLength * 0.5 ).Select(x => x.Key).ToList().ForEach(key => diclines.Remove(key)); foreach (double key in diclines.Keys.ToArray()) { FormLineList lines = diclines[key]; if (lines.Count < 2) { continue; } double _maxLength = lines.Max(line => line.Length); FormLine maxLengthLine = lines.Find(line => line.Length == _maxLength); lines.Where(line => line.Length < (_maxLength * 0.7)).ToList().ForEach(line => lines.Remove(line)); } FormLineList templines = new FormLineList(diclines.SelectMany(pair => pair.Value).ToList()); if (templines.Count > 1) { maxLength = templines.Select(line => line.Length).Max(); double scale = 0.4; double minLength = maxLength * scale; IEnumerable <double> shortLineKeys = diclines.Where( x => x.Value.Sum(line => line.Length) < minLength ).Select(x => x.Key); shortLineKeys.ToList().ForEach(key => diclines.Remove(key)); } } else { maxLength = posRect.Height(); if (posRect.Height() < 300) { maxLength = _lines.Select(line => line.Length).Max(); } double minLength = maxLength * 0.4; if (minLength < 9) { minLength = 9; } IEnumerable <double> shortLineKeys = diclines.Where( x => x.Value.Sum(line => line.Length) < minLength ).Select(x => x.Key); shortLineKeys.ToList().ForEach(key => diclines.Remove(key)); } } else { diclines.Clear(); } }