예제 #1
0
        public TextExtractionResults Extract(byte[] image)
        {
            var end = Amazon.RegionEndpoint.EUWest1;

            Amazon.Runtime.BasicAWSCredentials awsCreds =
                new Amazon.Runtime.BasicAWSCredentials(this.AmazonAccessKey, this.AmazonSecretKey);
            var region = Amazon.RegionEndpoint.GetBySystemName("eu-west-1");
            var cfg    = new Amazon.Textract.AmazonTextractConfig();

            Amazon.Textract.AmazonTextractClient client = new Amazon.Textract.AmazonTextractClient(awsCreds, end);
            //Amazon.Textract.AmazonTextractClient client1 = new Amazon.Textract.AmazonTextractClient(awsCreds, cfg);
            var request = new Amazon.Textract.Model.DetectDocumentTextRequest();

            request.Document = new Amazon.Textract.Model.Document();
            Amazon.Textract.Model.DetectDocumentTextResponse result = null;
            using (var memstm = new System.IO.MemoryStream(image))
            {
                request.Document.Bytes = memstm;
                result = client.DetectDocumentTextAsync(request).Result;
            }
            TextExtractionResults rs           = new TextExtractionResults();
            List <TextBlock>      lstTextBoxes = new List <TextBlock>();

            DetermineDimensions(image);
            foreach (var block in result.Blocks)
            {
                TextBlock rsText = new TextBlock();
                rsText.Text = block.Text;
                if (string.IsNullOrWhiteSpace(rsText.Text))
                {
                    continue;                                        //The first element is always NULL, so it appears
                }
                if (block.BlockType == Amazon.Textract.BlockType.WORD)
                {
                    double xTopLeft = block.Geometry.BoundingBox.Left * this.Width;
                    double yTopLeft = block.Geometry.BoundingBox.Top * this.Height;
                    double width    = block.Geometry.BoundingBox.Width * this.Width;
                    double height   = block.Geometry.BoundingBox.Height * this.Height;
                    rsText.X1 = xTopLeft;
                    rsText.Y1 = yTopLeft;
                    rsText.X2 = rsText.X1 + width;
                    rsText.Y2 = rsText.Y1 + height;
                    lstTextBoxes.Add(rsText);
                }
                else
                {
                    //Para or line perhaps?
                }
            }
            rs.Blocks = lstTextBoxes.ToArray();
            return(rs);
        }
예제 #2
0
 private void RenderJson(TextExtractionResults lastOcrResults)
 {
     try
     {
         var settings = new Newtonsoft.Json.JsonSerializerSettings
         {
             Formatting = Newtonsoft.Json.Formatting.Indented,
         };
         string text = Newtonsoft.Json.JsonConvert.SerializeObject(lastOcrResults, settings);
         ctlJsonViewer.Text = "";
         ctlJsonViewer.Text = text;
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.ToString());
     }
 }
예제 #3
0
        public TextExtractionResults Extract(byte[] image)
        {
            HttpClient httpClient = new HttpClient();

            httpClient.Timeout = new TimeSpan(1, 1, 1);
            string lang = "eng";
            MultipartFormDataContent form = new MultipartFormDataContent();

            //form.Add(new StringContent("helloworld"), "apikey"); //This works ! I am not sure how?
            form.Add(new StringContent(this.ApiKey), "apikey"); //Added api key in form data
            form.Add(new StringContent(lang), "language");
            form.Add(new StringContent("True"), "isOverlayRequired");

            form.Add(new ByteArrayContent(image, 0, image.Length), "image", "image.jpg");

            HttpResponseMessage response = httpClient.PostAsync("https://api.ocr.space/Parse/Image", form).Result;

            string strContent = response.Content.ReadAsStringAsync().Result;

            entity.Rootobject ocrResult = JsonConvert.DeserializeObject <entity.Rootobject>(strContent);

            TextExtractionResults rs = new TextExtractionResults();

            entity.Word[] allWords = ocrResult.
                                     ParsedResults.
                                     SelectMany(r => r.TextOverlay.Lines).
                                     SelectMany(ln => ln.Words).
                                     ToArray();
            List <TextBlock> lstTextWords = new List <TextBlock>();

            foreach (var word in allWords)
            {
                TextBlock rsTxt = new TextBlock
                {
                    Text = word.WordText,
                    X1   = word.Left,
                    Y1   = word.Top
                };
                rsTxt.X2 = word.Left + word.Width;
                rsTxt.Y2 = word.Top + word.Height;
                lstTextWords.Add(rsTxt);
            }
            rs.Blocks = lstTextWords.ToArray();
            return(rs);
        }
예제 #4
0
        public TextExtractionResults Extract(byte[] image)
        {
            TextExtractionResults   rs                = new TextExtractionResults();
            List <TextBlock>        rsTextBoxes       = new List <TextBlock>();
            AmazonRekognitionClient rekognitionClient = CreateAwsClient();

            using (var stm = new System.IO.MemoryStream(image))
            {
                double imgWidth, imgHeight;
                using (var bmp = new System.Drawing.Bitmap(stm))
                {
                    imgWidth  = bmp.Width;
                    imgHeight = bmp.Height;
                }
                var awsImage = new Amazon.Rekognition.Model.Image();
                awsImage.Bytes = new System.IO.MemoryStream(image);
                var req = new Amazon.Rekognition.Model.DetectTextRequest
                {
                    Image = awsImage
                };
                var detectTextResponse = rekognitionClient.DetectTextAsync(req).Result;
                foreach (Amazon.Rekognition.Model.TextDetection textResult in detectTextResponse.TextDetections)
                {
                    string text = textResult.DetectedText;
                    Amazon.Rekognition.Model.Geometry oGeom = textResult.Geometry;
                    TextBlock rsText = new TextBlock();
                    rsText.Text = text;
                    rsText.X1   = oGeom.BoundingBox.Left * imgWidth;
                    rsText.X2   = rsText.X1 + oGeom.BoundingBox.Width * imgWidth;
                    rsText.Y1   = oGeom.BoundingBox.Top * imgHeight;
                    rsText.Y2   = rsText.Y1 + oGeom.BoundingBox.Height * imgHeight;
                    rsTextBoxes.Add(rsText);
                }
            }
            rs.Blocks = rsTextBoxes.ToArray();
            return(rs);
        }
예제 #5
0
 private void RenderImage(byte[] raw, TextExtractionResults lastOcrResults)
 {
     Contracts.entity.TextExtractionResults ocr = null;
     if (lastOcrResults == null)
     {
         ctlStatusPanel0.Text = $"Found {lastOcrResults.Blocks.Length} text objects";
         //Create an empty object if none was specified
         ocr = new Contracts.entity.TextExtractionResults
         {
             Blocks = new Contracts.entity.TextBlock[]
             {
             }
         };
     }
     else
     {
         ocr = lastOcrResults;
     }
     System.Drawing.Pen   penBlock    = new System.Drawing.Pen(System.Drawing.Color.Black);
     System.Drawing.Pen   penSentence = new System.Drawing.Pen(System.Drawing.Color.Orange, 3);
     System.Drawing.Pen   penPara     = new System.Drawing.Pen(System.Drawing.Color.Blue, 3);
     System.Drawing.Brush b           = new System.Drawing.SolidBrush(
         System.Drawing.Color.FromArgb(100, System.Drawing.Color.Yellow));
     using (var memStm = new System.IO.MemoryStream(raw))
     {
         var imge = System.Drawing.Image.FromStream(memStm);
         _picBox.Image = imge;
         using (var g = System.Drawing.Graphics.FromImage(imge))
         {
             foreach (var box in ocr.Blocks)
             {
                 //var pts = new System.Drawing.Point[]
                 //{
                 //    new System.Drawing.Point((int)box.X1,(int)box.Y1),
                 //    new System.Drawing.Point((int)box.X2,(int)box.Y1),
                 //    new System.Drawing.Point((int)box.X2,(int)box.Y2),
                 //    new System.Drawing.Point((int)box.X1,(int)box.Y2),
                 //    new System.Drawing.Point((int)box.X1,(int)box.Y1)
                 //};
                 //g.DrawLines(pen,pts);
                 int   xUpperLeft = (int)Math.Min(box.X1, box.X2);
                 int   yUpperLeft = (int)Math.Min(box.Y1, box.Y2);
                 float width      = (float)Math.Abs(box.X1 - box.X2);
                 float ht         = (float)Math.Abs(box.Y1 - box.Y2);
                 g.DrawRectangle(penBlock, xUpperLeft, yUpperLeft, width, ht);
                 g.FillRectangle(b, xUpperLeft, yUpperLeft, width, ht);
             }
             foreach (var sentence in ocr.Sentences)
             {
                 g.DrawLine(
                     penSentence,
                     sentence.Rectangle.X, sentence.Rectangle.Bottom,
                     sentence.Rectangle.Right, sentence.Rectangle.Bottom);
             }
             foreach (var para in ocr.Paragraphs)
             {
                 g.DrawRectangle(
                     penPara,
                     para.Rectangle.Left, para.Rectangle.Top,
                     para.Rectangle.Width, para.Rectangle.Height);
             }
         }
     }
 }
예제 #6
0
        public TextExtractionResults ExtractEntitiesFromJson(string jsonText)
        {
            //TextExtractionResults rs = new TextExtractionResults();
            //JObject json = JObject.Parse(jsonText);
            //IEnumerable<JToken> words = json.SelectTokens("$..words");
            //JToken[] arr = words.ToArray();
            //JToken[] arrAllChildren = arr.SelectMany(t => t).ToArray();
            //rs.Blocks = arrAllChildren.
            //                    Select(r => CreateFromJtoken(r)).
            //                    ToArray();

            TextExtractionResults rs      = new TextExtractionResults();
            JObject              json     = JObject.Parse(jsonText);
            List <Paragraph>     lstParas = new List <Paragraph>();
            IEnumerable <JToken> sections = json.
                                            SelectTokens("$..regions").
                                            SelectMany(l => l).ToArray();

            foreach (JToken tSection in sections)
            {
                string  boxCoordinates = tSection["boundingBox"].Value <string>();
                float[] dblCoordinates =
                    boxCoordinates.Split(',').
                    Select(frag => float.Parse(frag)).
                    ToArray();
                Paragraph paraNew = new Paragraph();
                var       rect    = new System.Drawing.RectangleF(
                    dblCoordinates[0], dblCoordinates[1],
                    dblCoordinates[2], dblCoordinates[3]);
                paraNew.Rectangle = rect;
                lstParas.Add(paraNew);
            }
            rs.Paragraphs = lstParas.ToArray();
            ///
            /// Use the sentences approach - May ,2019
            ///
            IEnumerable <JToken> lines = json.
                                         SelectTokens("$..lines").
                                         SelectMany(l => l).ToArray();
            List <TextBlock> lstAllBlocks    = new List <TextBlock>();
            List <Sentence>  lstAllSentences = new List <Sentence>();

            foreach (JToken line in lines)
            {
                string  boxCoordinates = line["boundingBox"].Value <string>();
                float[] dblCoordinates =
                    boxCoordinates.Split(',').
                    Select(frag => float.Parse(frag)).
                    ToArray();
                Sentence sentNew = new Sentence();
                sentNew.Rectangle = new System.Drawing.RectangleF(
                    dblCoordinates[0], dblCoordinates[1],
                    dblCoordinates[2], dblCoordinates[3]);
                JToken[] arrAllWords = line.
                                       SelectTokens("$..words").
                                       SelectMany(t => t).ToArray();
                TextBlock[] blockFromWords = arrAllWords.
                                             Select(r => CreateFromJtoken(r)).
                                             ToArray();
                sentNew.Blocks = blockFromWords;
                lstAllSentences.Add(sentNew);
                lstAllBlocks.AddRange(blockFromWords);
            }
            rs.Sentences = lstAllSentences.ToArray();
            rs.Blocks    = lstAllBlocks.ToArray();
            return(rs);
        }