Example #1
0
        protected override void StartLoop(NativeActivityContext context)
        {
            // var match = Element.Get(context);
            var    wordlimit     = WordLimit.Get(context);
            var    lang          = Config.local.ocrlanguage;
            var    casesensitive = CaseSensitive.Get(context);
            string basepath      = Interfaces.Extensions.DataDirectory;
            string path          = System.IO.Path.Combine(basepath, "tessdata");

            ocr.TesseractDownloadLangFile(path, Config.local.ocrlanguage);
            ocr.TesseractDownloadLangFile(path, "osd");
            var ele = Element.Get(context);

            // ele.element.Save(@"c:\temp\dump.png", System.Drawing.Imaging.ImageFormat.Png);

            // var result = ocr.GetTextcomponents(path, Config.local.ocrlanguage, ele.element);
            // var result = ocr.GetTextcomponents(path, Config.local.ocrlanguage, @"c:\temp\dump.png");

            ImageElement[] result;
            var            _ocr = new Emgu.CV.OCR.Tesseract(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined);

            _ocr.Init(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined);
            _ocr.PageSegMode = Emgu.CV.OCR.PageSegMode.SparseText;

            // OpenRPA.Interfaces.Image.Util.SaveImageStamped(ele.element, "OCR");
            Bitmap sourceimg = null;

            if (ele is ImageElement)
            {
                sourceimg = ((ImageElement)ele).element;
            }
            else
            {
                sourceimg = Interfaces.Image.Util.Screenshot(ele.Rectangle.X, ele.Rectangle.Y, ele.Rectangle.Width, ele.Rectangle.Height);
            }
            using (var img = new Emgu.CV.Image <Emgu.CV.Structure.Bgr, byte>(sourceimg))
            {
                result = ocr.OcrImage2(_ocr, img.Mat, wordlimit, casesensitive);
            }
            Log.Debug("adding element cords to results: " + ele.Rectangle.ToString());
            foreach (var R in result)
            {
                var rect = new System.Drawing.Rectangle(R.Rectangle.X + ele.Rectangle.X, R.Rectangle.Y + ele.Rectangle.Y, R.Rectangle.Width, R.Rectangle.Height);
                R.Rectangle = rect;
                Log.Debug("Found: '" + R.Text + "' at " + R.Rectangle.ToString());
            }
            context.SetValue(Result, result);

            IEnumerator <ImageElement> _enum = result.ToList().GetEnumerator();

            context.SetValue(_elements, _enum);
            bool more = _enum.MoveNext();

            if (more)
            {
                IncIndex(context);
                SetTotal(context, result.Length);
                context.ScheduleAction(Body, _enum.Current, OnBodyComplete);
            }
        }
        public static Receipt GetReceipt(this Emgu.CV.OCR.Tesseract source)
        {
            var linesOfText = source.GetUTF8Text()?.Split('\n').ToList();

            return(new Receipt {
                LinesOfText = linesOfText
            });
        }
Example #3
0
 public static string OcrImage(Emgu.CV.OCR.Tesseract _ocr, Emgu.CV.Mat image)
 {
     using (var imageColor = new Mat())
         using (Mat imgGrey = new Mat())
             using (Mat imgThresholded = new Mat())
             {
                 if (image.NumberOfChannels == 1)
                 {
                     CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr);
                 }
                 else
                 {
                     image.CopyTo(imageColor);
                 }
                 //Interfaces.Image.Util.SaveImageStamped(imageColor.Bitmap, "OcrImage-Color");
                 _ocr.SetImage(imageColor);
                 _ocr.AnalyseLayout();
                 if (_ocr.Recognize() != 0)
                 {
                     throw new Exception("Failed to recognizer image");
                 }
                 Emgu.CV.OCR.Tesseract.Character[] characters = _ocr.GetCharacters();
                 Log.Debug("GetCharacters found " + characters.Length + " with colors");
                 if (characters.Length == 0)
                 {
                     CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray);
                     //Interfaces.Image.Util.SaveImageStamped(imgGrey.Bitmap, "OcrImage-Gray");
                     _ocr.SetImage(imgGrey);
                     _ocr.AnalyseLayout();
                     if (_ocr.Recognize() != 0)
                     {
                         throw new Exception("Failed to recognizer image");
                     }
                     characters = _ocr.GetCharacters();
                     Log.Debug("GetCharacters found " + characters.Length + " with grey scaled");
                     if (characters.Length == 0)
                     {
                         CvInvoke.Threshold(imgGrey, imgThresholded, 65, 255, ThresholdType.Binary);
                         //Interfaces.Image.Util.SaveImageStamped(imgThresholded.Bitmap, "OcrImage-Thresholded");
                         _ocr.SetImage(imgThresholded);
                         _ocr.AnalyseLayout();
                         if (_ocr.Recognize() != 0)
                         {
                             throw new Exception("Failed to recognizer image");
                         }
                         characters = _ocr.GetCharacters();
                         Log.Debug("GetCharacters found " + characters.Length + " thresholded");
                     }
                 }
                 return(_ocr.GetUTF8Text().TrimEnd(Environment.NewLine.ToCharArray()));
             }
 }
Example #4
0
        public static ImageElement[] Execute(IElement ele, System.Activities.Presentation.Model.ModelItem model)
        {
            var wordlimit     = model.GetValue <string>("WordLimit");
            var casesensitive = model.GetValue <bool>("CaseSensitive");
            var lang          = Config.local.ocrlanguage;

            string basepath = Interfaces.Extensions.DataDirectory;
            string path     = System.IO.Path.Combine(basepath, "tessdata");

            ocr.TesseractDownloadLangFile(path, Config.local.ocrlanguage);
            ocr.TesseractDownloadLangFile(path, "osd");

            ImageElement[] result;
            var            _ocr = new Emgu.CV.OCR.Tesseract(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined);

            _ocr.Init(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined);
            _ocr.PageSegMode = Emgu.CV.OCR.PageSegMode.SparseText;

            // OpenRPA.Interfaces.Image.Util.SaveImageStamped(ele.element, "OCR");
            Bitmap sourceimg = null;

            if (ele is ImageElement)
            {
                sourceimg = ((ImageElement)ele).element;
            }
            else
            {
                sourceimg = Interfaces.Image.Util.Screenshot(ele.Rectangle.X, ele.Rectangle.Y, ele.Rectangle.Width, ele.Rectangle.Height);
            }
            using (var img = new Emgu.CV.Image <Emgu.CV.Structure.Bgr, byte>(sourceimg))
            {
                result = ocr.OcrImage2(_ocr, img.Mat, wordlimit, casesensitive);
            }
            Log.Debug("adding element cords to results: " + ele.Rectangle.ToString());
            foreach (var R in result)
            {
                var rect = new System.Drawing.Rectangle(R.Rectangle.X + ele.Rectangle.X, R.Rectangle.Y + ele.Rectangle.Y, R.Rectangle.Width, R.Rectangle.Height);
                R.Rectangle = rect;
                Log.Debug("Found: '" + R.Text + "' at " + R.Rectangle.ToString());
            }
            return(result);
        }
Example #5
0
        public static void InitialOCR(ref Emgu.CV.OCR.Tesseract f_OCR, String f_Folder, String f_Lang, Emgu.CV.OCR.OcrEngineMode f_OcrEngineMode = Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined)
        {
            try
            {
                //check OCR is if it is clear
                if (f_OCR != null)
                {
                    f_OCR.Dispose();
                    f_OCR = null;
                }
                //check Directory format is correct
                String t_Folder;
                t_Folder = CheckDirectoyFormat(f_Folder);

                //extract the language item
                List <String> t_DownloadLangFile = new List <String>();
                if (f_Lang.Length == 0)
                {
                    f_Lang = String.Format("{0}", "eng");
                    t_DownloadLangFile.Add("eng");
                }
                else
                {
                    String[] t_LangSplit;
                    t_LangSplit = (String[])f_Lang.Split('+');
                    t_DownloadLangFile.AddRange(t_LangSplit);
                }
                TesseractDownloadLangFile(t_Folder, t_DownloadLangFile);

                f_OCR = new Emgu.CV.OCR.Tesseract(t_Folder, f_Lang, f_OcrEngineMode);
            }
            catch (Exception e)
            {
                f_OCR = null;
                MessageBox.Show(e.Message, "Failed to initialize tesseract OCR engine", MessageBoxButtons.OK);
            }
        }
Example #6
0
        /// <summary>
        /// Tesseract OCR
        /// </summary>
        /// <param name="filteredPlate"></param>
        /// <param name="_ocr"></param>
        /// <returns></returns>
        public static StringBuilder GetText(UMat filteredPlate, Emgu.CV.OCR.Tesseract _ocr)
        {
            Emgu.CV.OCR.Tesseract.Character[] words;
            StringBuilder strBuilder = new StringBuilder();

            using (UMat tmp = filteredPlate.Clone())
            {
                _ocr.SetImage(tmp);
                _ocr.Recognize();

                strBuilder.Append(_ocr.GetUTF8Text());

                words = _ocr.GetCharacters();

                //if (words.Length == 0) continue;

                //for (int i = 0; i < words.Length; i++)
                //{
                //    strBuilder.Append(words[i].Text);
                //}
            }

            return(strBuilder);
        }
Example #7
0
        public static ImageElement[] OcrImage2(Emgu.CV.OCR.Tesseract _ocr, Emgu.CV.Mat image, string wordlimit, bool casesensitive)
        {
            using (var imageColor = new Mat())
                using (Mat imgGrey = new Mat())
                {
                    if (image.NumberOfChannels == 1)
                    {
                        CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr);
                    }
                    else
                    {
                        image.CopyTo(imageColor);
                    }
                    // _ocr.SetImage(imageColor);
                    CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray);
                    _ocr.SetImage(imgGrey);
                    _ocr.AnalyseLayout();
                    if (_ocr.Recognize() != 0)
                    {
                        throw new Exception("Failed to recognizer image");
                    }
                    Emgu.CV.OCR.Tesseract.Character[] characters = _ocr.GetCharacters();
                    var       index          = 0;
                    var       wordlimitindex = 0;
                    var       chars          = new List <Emgu.CV.OCR.Tesseract.Character>();
                    var       result         = new List <ImageElement>();
                    var       wordresult     = new List <ImageElement>();
                    var       wordchars      = new List <Emgu.CV.OCR.Tesseract.Character>();
                    Rectangle desktop        = new Rectangle(0, 0, System.Windows.Forms.Screen.PrimaryScreen.Bounds.Width, System.Windows.Forms.Screen.PrimaryScreen.Bounds.Height);
                    Rectangle imagerect      = new Rectangle(0, 0, image.Width, image.Height);
                    while (index < characters.Length)
                    {
                        if (!string.IsNullOrEmpty(wordlimit))
                        {
                            if ((characters[index].Text == wordlimit[wordlimitindex].ToString()) ||
                                (!casesensitive && characters[index].Text.ToLower() == wordlimit[wordlimitindex].ToString().ToLower()))
                            {
                                wordchars.Add(characters[index]);
                                wordlimitindex++;
                                if (wordchars.Count == wordlimit.Length)
                                {
                                    var res = new ImageElement(Rectangle.Empty);
                                    wordchars.ForEach(x => res.Text += x.Text);
                                    res.Confidence = wordchars[0].Cost;
                                    Rectangle rect = new Rectangle(wordchars[0].Region.X, wordchars[0].Region.Y, wordchars[0].Region.Width, wordchars[0].Region.Height);
                                    rect.Width    = (wordchars[wordchars.Count - 1].Region.X - wordchars[0].Region.X) + wordchars[wordchars.Count - 1].Region.Width;
                                    rect.Height   = (wordchars[wordchars.Count - 1].Region.Y - wordchars[0].Region.Y) + wordchars[wordchars.Count - 1].Region.Height;
                                    res.Rectangle = rect;
                                    wordresult.Add(res);
                                    wordchars.Clear();
                                    wordlimitindex = 0;
                                    if (!desktop.Contains(rect))
                                    {
                                        Log.Error("Found element outside desktop !!!!!");
                                    }
                                    if (!imagerect.Contains(rect))
                                    {
                                        Log.Error("Found element outside desktop !!!!!");
                                    }
                                    Log.Debug("Found: " + res.Text + " at " + res.Rectangle.ToString());
                                }
                            }
                            else
                            {
                                wordchars.Clear();
                                wordlimitindex = 0;
                            }
                        }
                        if (characters[index].Text == " " || characters[index].Text == "\r" || characters[index].Text == "\n")
                        {
                            if (chars.Count > 0)
                            {
                                var res = new ImageElement(Rectangle.Empty);
                                chars.ForEach(x => res.Text += x.Text);
                                res.Confidence = chars[0].Cost;
                                Rectangle rect = new Rectangle(chars[0].Region.X, chars[0].Region.Y, chars[0].Region.Width, chars[0].Region.Height);
                                rect.Width    = (chars[chars.Count - 1].Region.X - chars[0].Region.X) + chars[chars.Count - 1].Region.Width;
                                rect.Height   = (chars[chars.Count - 1].Region.Y - chars[0].Region.Y) + chars[chars.Count - 1].Region.Height;
                                res.Rectangle = rect;
                                result.Add(res);
                            }
                            index++;
                            chars.Clear();
                            continue;
                        }
                        chars.Add(characters[index]);
                        index++;
                    }
                    if (chars.Count > 0)
                    {
                        var res = new ImageElement(Rectangle.Empty);
                        chars.ForEach(x => res.Text += x.Text);
                        res.Confidence = chars[0].Cost;
                        Rectangle rect = new Rectangle(chars[0].Region.X, chars[0].Region.Y, chars[0].Region.Width, chars[0].Region.Height);
                        rect.Width  = (chars[chars.Count - 1].Region.X - chars[0].Region.X) + chars[chars.Count - 1].Region.Width;
                        rect.Height = (chars[chars.Count - 1].Region.Y - chars[0].Region.Y) + chars[chars.Count - 1].Region.Height;

                        res.Rectangle = rect;
                        result.Add(res);
                    }
                    if (!string.IsNullOrEmpty(wordlimit))
                    {
                        return(wordresult.ToArray());
                    }
                    return(result.ToArray());
                }
        }
Example #8
0
        public static S_OCR_Result DoReconizeOCR(ref Emgu.CV.OCR.Tesseract f_OCR, Emgu.CV.Mat f_Mat)
        {
            Emgu.CV.Mat t_Mat = new Emgu.CV.Mat();
            if (f_Mat.NumberOfChannels != 1)
            {
                f_Mat.ConvertTo(t_Mat, Emgu.CV.CvEnum.DepthType.Cv8U);
            }
            else
            {
                t_Mat = f_Mat.Clone();
            }
            f_OCR.SetImage(t_Mat);
            f_OCR.Recognize();
            S_OCR_Result t_OCR_Result;

            t_OCR_Result.s_LineRectangle = new List <Rectangle>();
            t_OCR_Result.s_LineString    = new List <string>();
            t_OCR_Result.s_HOCR          = f_OCR.GetHOCRText();
            String t_GetUTF8Text = f_OCR.GetUTF8Text();

            String[] t_SpilitUTF8Text = t_GetUTF8Text.Split(System.Environment.NewLine.ToCharArray());
            for (int i = 0; i < t_SpilitUTF8Text.Length; i++)
            {
                t_SpilitUTF8Text[i] = t_SpilitUTF8Text[i].Trim();
                if (t_SpilitUTF8Text[i].Length > 0)
                {
                    t_OCR_Result.s_LineString.Add(t_SpilitUTF8Text[i]);
                }
            }
            Emgu.CV.OCR.Tesseract.Character[] t_Characters = f_OCR.GetCharacters();

            int       t_X = 0, t_Y = 0, t_Width = 0, t_Height = 0;
            bool      t_IsFirstWord = true;
            string    t_Text;
            Rectangle t_Region;

            for (int i = 0; i < t_Characters.Length; i++)
            {
                Emgu.CV.OCR.Tesseract.Character t_Character = t_Characters[i];
                t_Text   = t_Character.Text;
                t_Region = t_Character.Region;
                if (t_Character.Text == " ")
                {
                    continue;
                }
                //Emgu.CV.OCR.Tesseract.Character t_LastCharacter = t_Characters[i];
                if (t_IsFirstWord == true)
                {
                    t_IsFirstWord = false;
                    t_X           = t_Character.Region.X;
                    t_Y           = t_Character.Region.Y;
                }
                if (t_Character.Text == System.Environment.NewLine.ToString())
                {
                    t_IsFirstWord = true;
                    t_X           = t_Character.Region.X;
                    t_Y           = t_Character.Region.Y;
                    t_OCR_Result.s_LineRectangle.Add(new Rectangle(t_X, t_Y, t_Width, t_Height));
                }
            }



            return(t_OCR_Result);
        }
Example #9
0
 public static List <string> GetLinesOfText(this Emgu.CV.OCR.Tesseract source)
 {
     return(source.GetUTF8Text()?.Split('\n').ToList());
 }