public TextExtractionResults Extract(byte[] image) { var end = Amazon.RegionEndpoint.EUWest1; Amazon.Runtime.BasicAWSCredentials awsCreds = new Amazon.Runtime.BasicAWSCredentials(this.AmazonAccessKey, this.AmazonSecretKey); var region = Amazon.RegionEndpoint.GetBySystemName("eu-west-1"); var cfg = new Amazon.Textract.AmazonTextractConfig(); Amazon.Textract.AmazonTextractClient client = new Amazon.Textract.AmazonTextractClient(awsCreds, end); //Amazon.Textract.AmazonTextractClient client1 = new Amazon.Textract.AmazonTextractClient(awsCreds, cfg); var request = new Amazon.Textract.Model.DetectDocumentTextRequest(); request.Document = new Amazon.Textract.Model.Document(); Amazon.Textract.Model.DetectDocumentTextResponse result = null; using (var memstm = new System.IO.MemoryStream(image)) { request.Document.Bytes = memstm; result = client.DetectDocumentTextAsync(request).Result; } TextExtractionResults rs = new TextExtractionResults(); List <TextBlock> lstTextBoxes = new List <TextBlock>(); DetermineDimensions(image); foreach (var block in result.Blocks) { TextBlock rsText = new TextBlock(); rsText.Text = block.Text; if (string.IsNullOrWhiteSpace(rsText.Text)) { continue; //The first element is always NULL, so it appears } if (block.BlockType == Amazon.Textract.BlockType.WORD) { double xTopLeft = block.Geometry.BoundingBox.Left * this.Width; double yTopLeft = block.Geometry.BoundingBox.Top * this.Height; double width = block.Geometry.BoundingBox.Width * this.Width; double height = block.Geometry.BoundingBox.Height * this.Height; rsText.X1 = xTopLeft; rsText.Y1 = yTopLeft; rsText.X2 = rsText.X1 + width; rsText.Y2 = rsText.Y1 + height; lstTextBoxes.Add(rsText); } else { //Para or line perhaps? } } rs.Blocks = lstTextBoxes.ToArray(); return(rs); }
private void RenderJson(TextExtractionResults lastOcrResults) { try { var settings = new Newtonsoft.Json.JsonSerializerSettings { Formatting = Newtonsoft.Json.Formatting.Indented, }; string text = Newtonsoft.Json.JsonConvert.SerializeObject(lastOcrResults, settings); ctlJsonViewer.Text = ""; ctlJsonViewer.Text = text; } catch (Exception ex) { MessageBox.Show(ex.ToString()); } }
public TextExtractionResults Extract(byte[] image) { HttpClient httpClient = new HttpClient(); httpClient.Timeout = new TimeSpan(1, 1, 1); string lang = "eng"; MultipartFormDataContent form = new MultipartFormDataContent(); //form.Add(new StringContent("helloworld"), "apikey"); //This works ! I am not sure how? form.Add(new StringContent(this.ApiKey), "apikey"); //Added api key in form data form.Add(new StringContent(lang), "language"); form.Add(new StringContent("True"), "isOverlayRequired"); form.Add(new ByteArrayContent(image, 0, image.Length), "image", "image.jpg"); HttpResponseMessage response = httpClient.PostAsync("https://api.ocr.space/Parse/Image", form).Result; string strContent = response.Content.ReadAsStringAsync().Result; entity.Rootobject ocrResult = JsonConvert.DeserializeObject <entity.Rootobject>(strContent); TextExtractionResults rs = new TextExtractionResults(); entity.Word[] allWords = ocrResult. ParsedResults. SelectMany(r => r.TextOverlay.Lines). SelectMany(ln => ln.Words). ToArray(); List <TextBlock> lstTextWords = new List <TextBlock>(); foreach (var word in allWords) { TextBlock rsTxt = new TextBlock { Text = word.WordText, X1 = word.Left, Y1 = word.Top }; rsTxt.X2 = word.Left + word.Width; rsTxt.Y2 = word.Top + word.Height; lstTextWords.Add(rsTxt); } rs.Blocks = lstTextWords.ToArray(); return(rs); }
public TextExtractionResults Extract(byte[] image) { TextExtractionResults rs = new TextExtractionResults(); List <TextBlock> rsTextBoxes = new List <TextBlock>(); AmazonRekognitionClient rekognitionClient = CreateAwsClient(); using (var stm = new System.IO.MemoryStream(image)) { double imgWidth, imgHeight; using (var bmp = new System.Drawing.Bitmap(stm)) { imgWidth = bmp.Width; imgHeight = bmp.Height; } var awsImage = new Amazon.Rekognition.Model.Image(); awsImage.Bytes = new System.IO.MemoryStream(image); var req = new Amazon.Rekognition.Model.DetectTextRequest { Image = awsImage }; var detectTextResponse = rekognitionClient.DetectTextAsync(req).Result; foreach (Amazon.Rekognition.Model.TextDetection textResult in detectTextResponse.TextDetections) { string text = textResult.DetectedText; Amazon.Rekognition.Model.Geometry oGeom = textResult.Geometry; TextBlock rsText = new TextBlock(); rsText.Text = text; rsText.X1 = oGeom.BoundingBox.Left * imgWidth; rsText.X2 = rsText.X1 + oGeom.BoundingBox.Width * imgWidth; rsText.Y1 = oGeom.BoundingBox.Top * imgHeight; rsText.Y2 = rsText.Y1 + oGeom.BoundingBox.Height * imgHeight; rsTextBoxes.Add(rsText); } } rs.Blocks = rsTextBoxes.ToArray(); return(rs); }
private void RenderImage(byte[] raw, TextExtractionResults lastOcrResults) { Contracts.entity.TextExtractionResults ocr = null; if (lastOcrResults == null) { ctlStatusPanel0.Text = $"Found {lastOcrResults.Blocks.Length} text objects"; //Create an empty object if none was specified ocr = new Contracts.entity.TextExtractionResults { Blocks = new Contracts.entity.TextBlock[] { } }; } else { ocr = lastOcrResults; } System.Drawing.Pen penBlock = new System.Drawing.Pen(System.Drawing.Color.Black); System.Drawing.Pen penSentence = new System.Drawing.Pen(System.Drawing.Color.Orange, 3); System.Drawing.Pen penPara = new System.Drawing.Pen(System.Drawing.Color.Blue, 3); System.Drawing.Brush b = new System.Drawing.SolidBrush( System.Drawing.Color.FromArgb(100, System.Drawing.Color.Yellow)); using (var memStm = new System.IO.MemoryStream(raw)) { var imge = System.Drawing.Image.FromStream(memStm); _picBox.Image = imge; using (var g = System.Drawing.Graphics.FromImage(imge)) { foreach (var box in ocr.Blocks) { //var pts = new System.Drawing.Point[] //{ // new System.Drawing.Point((int)box.X1,(int)box.Y1), // new System.Drawing.Point((int)box.X2,(int)box.Y1), // new System.Drawing.Point((int)box.X2,(int)box.Y2), // new System.Drawing.Point((int)box.X1,(int)box.Y2), // new System.Drawing.Point((int)box.X1,(int)box.Y1) //}; //g.DrawLines(pen,pts); int xUpperLeft = (int)Math.Min(box.X1, box.X2); int yUpperLeft = (int)Math.Min(box.Y1, box.Y2); float width = (float)Math.Abs(box.X1 - box.X2); float ht = (float)Math.Abs(box.Y1 - box.Y2); g.DrawRectangle(penBlock, xUpperLeft, yUpperLeft, width, ht); g.FillRectangle(b, xUpperLeft, yUpperLeft, width, ht); } foreach (var sentence in ocr.Sentences) { g.DrawLine( penSentence, sentence.Rectangle.X, sentence.Rectangle.Bottom, sentence.Rectangle.Right, sentence.Rectangle.Bottom); } foreach (var para in ocr.Paragraphs) { g.DrawRectangle( penPara, para.Rectangle.Left, para.Rectangle.Top, para.Rectangle.Width, para.Rectangle.Height); } } } }
public TextExtractionResults ExtractEntitiesFromJson(string jsonText) { //TextExtractionResults rs = new TextExtractionResults(); //JObject json = JObject.Parse(jsonText); //IEnumerable<JToken> words = json.SelectTokens("$..words"); //JToken[] arr = words.ToArray(); //JToken[] arrAllChildren = arr.SelectMany(t => t).ToArray(); //rs.Blocks = arrAllChildren. // Select(r => CreateFromJtoken(r)). // ToArray(); TextExtractionResults rs = new TextExtractionResults(); JObject json = JObject.Parse(jsonText); List <Paragraph> lstParas = new List <Paragraph>(); IEnumerable <JToken> sections = json. SelectTokens("$..regions"). SelectMany(l => l).ToArray(); foreach (JToken tSection in sections) { string boxCoordinates = tSection["boundingBox"].Value <string>(); float[] dblCoordinates = boxCoordinates.Split(','). Select(frag => float.Parse(frag)). ToArray(); Paragraph paraNew = new Paragraph(); var rect = new System.Drawing.RectangleF( dblCoordinates[0], dblCoordinates[1], dblCoordinates[2], dblCoordinates[3]); paraNew.Rectangle = rect; lstParas.Add(paraNew); } rs.Paragraphs = lstParas.ToArray(); /// /// Use the sentences approach - May ,2019 /// IEnumerable <JToken> lines = json. SelectTokens("$..lines"). SelectMany(l => l).ToArray(); List <TextBlock> lstAllBlocks = new List <TextBlock>(); List <Sentence> lstAllSentences = new List <Sentence>(); foreach (JToken line in lines) { string boxCoordinates = line["boundingBox"].Value <string>(); float[] dblCoordinates = boxCoordinates.Split(','). Select(frag => float.Parse(frag)). ToArray(); Sentence sentNew = new Sentence(); sentNew.Rectangle = new System.Drawing.RectangleF( dblCoordinates[0], dblCoordinates[1], dblCoordinates[2], dblCoordinates[3]); JToken[] arrAllWords = line. SelectTokens("$..words"). SelectMany(t => t).ToArray(); TextBlock[] blockFromWords = arrAllWords. Select(r => CreateFromJtoken(r)). ToArray(); sentNew.Blocks = blockFromWords; lstAllSentences.Add(sentNew); lstAllBlocks.AddRange(blockFromWords); } rs.Sentences = lstAllSentences.ToArray(); rs.Blocks = lstAllBlocks.ToArray(); return(rs); }