public static List <string> ExtractKeyValuePairs(Line[] lines) { //Initialize settings List <TextExtract> searchKeyList = OCRHelper.RetrieveAllSearchTextKeyFields(); // Retrieve all key-fields as reference List <Word> textvalues = new List <Word>(); List <Line> linevalues = new List <Line>(); List <string> result = new List <string>(); int x_margin, y_margin, x_width, y_height; // Extract regions of text words foreach (Line sline in lines) { int[] lvalues = sline.BoundingBox; linevalues.Add(new Line { Text = sline.Text, BoundingBox = lvalues }); foreach (Word sword in sline.Words) { int[] wvalues = sword.BoundingBox; textvalues.Add(new Word { Text = sword.Text, BoundingBox = wvalues }); } } // Search Key-Value Pairs inside the documents if (searchKeyList.Count > 0) { foreach (TextExtract key in searchKeyList) { var resultkeys = linevalues.Where(a => a.Text.Contains(key.Text)); foreach (var fieldtext in resultkeys) { // Assign all fields values per text x_margin = key.MarginX; y_margin = key.MarginY; x_width = key.Width; y_height = key.Height; // For every value candidate set all values above string txtreply = string.Join(" ", from a in textvalues where (a.BoundingBox[0] >= fieldtext.BoundingBox[0] + x_margin) && (a.BoundingBox[0] <= fieldtext.BoundingBox[0] + x_margin + x_width) && (a.BoundingBox[1] >= fieldtext.BoundingBox[1] + y_margin) && (a.BoundingBox[1] <= fieldtext.BoundingBox[1] + y_height) select(string) a.Text); result.Add(fieldtext.Text + " - " + txtreply); } } return(result); } else { return(null); } }
public static async Task <List <string> > MakeAnalysisWithImage(Stream stream, string subscriptionKey, string uriEndPoint) { List <string> result = new List <string>(); HttpClient client = new HttpClient(); // Request headers client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", subscriptionKey); string requestParameters = String.Format("?mode=Printed"); // Assemble the URI for the REST API method. string uri = uriEndPoint + requestParameters; HttpResponseMessage response; BinaryReader binaryReader = new BinaryReader(stream); byte[] byteData = binaryReader.ReadBytes((int)stream.Length); using (ByteArrayContent content = new ByteArrayContent(byteData)) { content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); // Asynchronously call the REST API method. response = await client.PostAsync(uri, content); } string operationLocation = null; // The response contains the URI to retrieve the result of the process. if (response.IsSuccessStatusCode) { operationLocation = response.Headers.GetValues("Operation-Location").FirstOrDefault(); } string contentString; int i = 0; do { System.Threading.Thread.Sleep(1000); response = await client.GetAsync(operationLocation); contentString = await response.Content.ReadAsStringAsync(); ++i; }while (i < 10 && contentString.IndexOf("\"status\":\"Succeeded\"") == -1); string json = response.Content.ReadAsStringAsync().Result; json = json.TrimStart(new char[] { '[' }).TrimEnd(new char[] { ']' }); RecognizeText ocrOutput = JsonConvert.DeserializeObject <RecognizeText>(json); if (ocrOutput != null && ocrOutput.RecognitionResult != null && ocrOutput.RecognitionResult.Lines != null) { List <string> resultText = new List <string>(); resultText = (from Line sline in ocrOutput.RecognitionResult.Lines select(string) sline.Text).ToList <string>(); resultText = OCRHelper.ExtractKeyValuePairs(ocrOutput.RecognitionResult.Lines); return(resultText); } else { return(null); } }