protected override void StartLoop(NativeActivityContext context) { // var match = Element.Get(context); var wordlimit = WordLimit.Get(context); var lang = Config.local.ocrlanguage; var casesensitive = CaseSensitive.Get(context); string basepath = Interfaces.Extensions.DataDirectory; string path = System.IO.Path.Combine(basepath, "tessdata"); ocr.TesseractDownloadLangFile(path, Config.local.ocrlanguage); ocr.TesseractDownloadLangFile(path, "osd"); var ele = Element.Get(context); // ele.element.Save(@"c:\temp\dump.png", System.Drawing.Imaging.ImageFormat.Png); // var result = ocr.GetTextcomponents(path, Config.local.ocrlanguage, ele.element); // var result = ocr.GetTextcomponents(path, Config.local.ocrlanguage, @"c:\temp\dump.png"); ImageElement[] result; var _ocr = new Emgu.CV.OCR.Tesseract(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined); _ocr.Init(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined); _ocr.PageSegMode = Emgu.CV.OCR.PageSegMode.SparseText; // OpenRPA.Interfaces.Image.Util.SaveImageStamped(ele.element, "OCR"); Bitmap sourceimg = null; if (ele is ImageElement) { sourceimg = ((ImageElement)ele).element; } else { sourceimg = Interfaces.Image.Util.Screenshot(ele.Rectangle.X, ele.Rectangle.Y, ele.Rectangle.Width, ele.Rectangle.Height); } using (var img = new Emgu.CV.Image <Emgu.CV.Structure.Bgr, byte>(sourceimg)) { result = ocr.OcrImage2(_ocr, img.Mat, wordlimit, casesensitive); } Log.Debug("adding element cords to results: " + ele.Rectangle.ToString()); foreach (var R in result) { var rect = new System.Drawing.Rectangle(R.Rectangle.X + ele.Rectangle.X, R.Rectangle.Y + ele.Rectangle.Y, R.Rectangle.Width, R.Rectangle.Height); R.Rectangle = rect; Log.Debug("Found: '" + R.Text + "' at " + R.Rectangle.ToString()); } context.SetValue(Result, result); IEnumerator <ImageElement> _enum = result.ToList().GetEnumerator(); context.SetValue(_elements, _enum); bool more = _enum.MoveNext(); if (more) { IncIndex(context); SetTotal(context, result.Length); context.ScheduleAction(Body, _enum.Current, OnBodyComplete); } }
public static Receipt GetReceipt(this Emgu.CV.OCR.Tesseract source) { var linesOfText = source.GetUTF8Text()?.Split('\n').ToList(); return(new Receipt { LinesOfText = linesOfText }); }
public static string OcrImage(Emgu.CV.OCR.Tesseract _ocr, Emgu.CV.Mat image) { using (var imageColor = new Mat()) using (Mat imgGrey = new Mat()) using (Mat imgThresholded = new Mat()) { if (image.NumberOfChannels == 1) { CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr); } else { image.CopyTo(imageColor); } //Interfaces.Image.Util.SaveImageStamped(imageColor.Bitmap, "OcrImage-Color"); _ocr.SetImage(imageColor); _ocr.AnalyseLayout(); if (_ocr.Recognize() != 0) { throw new Exception("Failed to recognizer image"); } Emgu.CV.OCR.Tesseract.Character[] characters = _ocr.GetCharacters(); Log.Debug("GetCharacters found " + characters.Length + " with colors"); if (characters.Length == 0) { CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray); //Interfaces.Image.Util.SaveImageStamped(imgGrey.Bitmap, "OcrImage-Gray"); _ocr.SetImage(imgGrey); _ocr.AnalyseLayout(); if (_ocr.Recognize() != 0) { throw new Exception("Failed to recognizer image"); } characters = _ocr.GetCharacters(); Log.Debug("GetCharacters found " + characters.Length + " with grey scaled"); if (characters.Length == 0) { CvInvoke.Threshold(imgGrey, imgThresholded, 65, 255, ThresholdType.Binary); //Interfaces.Image.Util.SaveImageStamped(imgThresholded.Bitmap, "OcrImage-Thresholded"); _ocr.SetImage(imgThresholded); _ocr.AnalyseLayout(); if (_ocr.Recognize() != 0) { throw new Exception("Failed to recognizer image"); } characters = _ocr.GetCharacters(); Log.Debug("GetCharacters found " + characters.Length + " thresholded"); } } return(_ocr.GetUTF8Text().TrimEnd(Environment.NewLine.ToCharArray())); } }
public static ImageElement[] Execute(IElement ele, System.Activities.Presentation.Model.ModelItem model) { var wordlimit = model.GetValue <string>("WordLimit"); var casesensitive = model.GetValue <bool>("CaseSensitive"); var lang = Config.local.ocrlanguage; string basepath = Interfaces.Extensions.DataDirectory; string path = System.IO.Path.Combine(basepath, "tessdata"); ocr.TesseractDownloadLangFile(path, Config.local.ocrlanguage); ocr.TesseractDownloadLangFile(path, "osd"); ImageElement[] result; var _ocr = new Emgu.CV.OCR.Tesseract(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined); _ocr.Init(path, lang.ToString(), Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined); _ocr.PageSegMode = Emgu.CV.OCR.PageSegMode.SparseText; // OpenRPA.Interfaces.Image.Util.SaveImageStamped(ele.element, "OCR"); Bitmap sourceimg = null; if (ele is ImageElement) { sourceimg = ((ImageElement)ele).element; } else { sourceimg = Interfaces.Image.Util.Screenshot(ele.Rectangle.X, ele.Rectangle.Y, ele.Rectangle.Width, ele.Rectangle.Height); } using (var img = new Emgu.CV.Image <Emgu.CV.Structure.Bgr, byte>(sourceimg)) { result = ocr.OcrImage2(_ocr, img.Mat, wordlimit, casesensitive); } Log.Debug("adding element cords to results: " + ele.Rectangle.ToString()); foreach (var R in result) { var rect = new System.Drawing.Rectangle(R.Rectangle.X + ele.Rectangle.X, R.Rectangle.Y + ele.Rectangle.Y, R.Rectangle.Width, R.Rectangle.Height); R.Rectangle = rect; Log.Debug("Found: '" + R.Text + "' at " + R.Rectangle.ToString()); } return(result); }
public static void InitialOCR(ref Emgu.CV.OCR.Tesseract f_OCR, String f_Folder, String f_Lang, Emgu.CV.OCR.OcrEngineMode f_OcrEngineMode = Emgu.CV.OCR.OcrEngineMode.TesseractLstmCombined) { try { //check OCR is if it is clear if (f_OCR != null) { f_OCR.Dispose(); f_OCR = null; } //check Directory format is correct String t_Folder; t_Folder = CheckDirectoyFormat(f_Folder); //extract the language item List <String> t_DownloadLangFile = new List <String>(); if (f_Lang.Length == 0) { f_Lang = String.Format("{0}", "eng"); t_DownloadLangFile.Add("eng"); } else { String[] t_LangSplit; t_LangSplit = (String[])f_Lang.Split('+'); t_DownloadLangFile.AddRange(t_LangSplit); } TesseractDownloadLangFile(t_Folder, t_DownloadLangFile); f_OCR = new Emgu.CV.OCR.Tesseract(t_Folder, f_Lang, f_OcrEngineMode); } catch (Exception e) { f_OCR = null; MessageBox.Show(e.Message, "Failed to initialize tesseract OCR engine", MessageBoxButtons.OK); } }
/// <summary> /// Tesseract OCR /// </summary> /// <param name="filteredPlate"></param> /// <param name="_ocr"></param> /// <returns></returns> public static StringBuilder GetText(UMat filteredPlate, Emgu.CV.OCR.Tesseract _ocr) { Emgu.CV.OCR.Tesseract.Character[] words; StringBuilder strBuilder = new StringBuilder(); using (UMat tmp = filteredPlate.Clone()) { _ocr.SetImage(tmp); _ocr.Recognize(); strBuilder.Append(_ocr.GetUTF8Text()); words = _ocr.GetCharacters(); //if (words.Length == 0) continue; //for (int i = 0; i < words.Length; i++) //{ // strBuilder.Append(words[i].Text); //} } return(strBuilder); }
public static ImageElement[] OcrImage2(Emgu.CV.OCR.Tesseract _ocr, Emgu.CV.Mat image, string wordlimit, bool casesensitive) { using (var imageColor = new Mat()) using (Mat imgGrey = new Mat()) { if (image.NumberOfChannels == 1) { CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr); } else { image.CopyTo(imageColor); } // _ocr.SetImage(imageColor); CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray); _ocr.SetImage(imgGrey); _ocr.AnalyseLayout(); if (_ocr.Recognize() != 0) { throw new Exception("Failed to recognizer image"); } Emgu.CV.OCR.Tesseract.Character[] characters = _ocr.GetCharacters(); var index = 0; var wordlimitindex = 0; var chars = new List <Emgu.CV.OCR.Tesseract.Character>(); var result = new List <ImageElement>(); var wordresult = new List <ImageElement>(); var wordchars = new List <Emgu.CV.OCR.Tesseract.Character>(); Rectangle desktop = new Rectangle(0, 0, System.Windows.Forms.Screen.PrimaryScreen.Bounds.Width, System.Windows.Forms.Screen.PrimaryScreen.Bounds.Height); Rectangle imagerect = new Rectangle(0, 0, image.Width, image.Height); while (index < characters.Length) { if (!string.IsNullOrEmpty(wordlimit)) { if ((characters[index].Text == wordlimit[wordlimitindex].ToString()) || (!casesensitive && characters[index].Text.ToLower() == wordlimit[wordlimitindex].ToString().ToLower())) { wordchars.Add(characters[index]); wordlimitindex++; if (wordchars.Count == wordlimit.Length) { var res = new ImageElement(Rectangle.Empty); wordchars.ForEach(x => res.Text += x.Text); res.Confidence = wordchars[0].Cost; Rectangle rect = new Rectangle(wordchars[0].Region.X, wordchars[0].Region.Y, wordchars[0].Region.Width, wordchars[0].Region.Height); rect.Width = (wordchars[wordchars.Count - 1].Region.X - wordchars[0].Region.X) + wordchars[wordchars.Count - 1].Region.Width; rect.Height = (wordchars[wordchars.Count - 1].Region.Y - wordchars[0].Region.Y) + wordchars[wordchars.Count - 1].Region.Height; res.Rectangle = rect; wordresult.Add(res); wordchars.Clear(); wordlimitindex = 0; if (!desktop.Contains(rect)) { Log.Error("Found element outside desktop !!!!!"); } if (!imagerect.Contains(rect)) { Log.Error("Found element outside desktop !!!!!"); } Log.Debug("Found: " + res.Text + " at " + res.Rectangle.ToString()); } } else { wordchars.Clear(); wordlimitindex = 0; } } if (characters[index].Text == " " || characters[index].Text == "\r" || characters[index].Text == "\n") { if (chars.Count > 0) { var res = new ImageElement(Rectangle.Empty); chars.ForEach(x => res.Text += x.Text); res.Confidence = chars[0].Cost; Rectangle rect = new Rectangle(chars[0].Region.X, chars[0].Region.Y, chars[0].Region.Width, chars[0].Region.Height); rect.Width = (chars[chars.Count - 1].Region.X - chars[0].Region.X) + chars[chars.Count - 1].Region.Width; rect.Height = (chars[chars.Count - 1].Region.Y - chars[0].Region.Y) + chars[chars.Count - 1].Region.Height; res.Rectangle = rect; result.Add(res); } index++; chars.Clear(); continue; } chars.Add(characters[index]); index++; } if (chars.Count > 0) { var res = new ImageElement(Rectangle.Empty); chars.ForEach(x => res.Text += x.Text); res.Confidence = chars[0].Cost; Rectangle rect = new Rectangle(chars[0].Region.X, chars[0].Region.Y, chars[0].Region.Width, chars[0].Region.Height); rect.Width = (chars[chars.Count - 1].Region.X - chars[0].Region.X) + chars[chars.Count - 1].Region.Width; rect.Height = (chars[chars.Count - 1].Region.Y - chars[0].Region.Y) + chars[chars.Count - 1].Region.Height; res.Rectangle = rect; result.Add(res); } if (!string.IsNullOrEmpty(wordlimit)) { return(wordresult.ToArray()); } return(result.ToArray()); } }
public static S_OCR_Result DoReconizeOCR(ref Emgu.CV.OCR.Tesseract f_OCR, Emgu.CV.Mat f_Mat) { Emgu.CV.Mat t_Mat = new Emgu.CV.Mat(); if (f_Mat.NumberOfChannels != 1) { f_Mat.ConvertTo(t_Mat, Emgu.CV.CvEnum.DepthType.Cv8U); } else { t_Mat = f_Mat.Clone(); } f_OCR.SetImage(t_Mat); f_OCR.Recognize(); S_OCR_Result t_OCR_Result; t_OCR_Result.s_LineRectangle = new List <Rectangle>(); t_OCR_Result.s_LineString = new List <string>(); t_OCR_Result.s_HOCR = f_OCR.GetHOCRText(); String t_GetUTF8Text = f_OCR.GetUTF8Text(); String[] t_SpilitUTF8Text = t_GetUTF8Text.Split(System.Environment.NewLine.ToCharArray()); for (int i = 0; i < t_SpilitUTF8Text.Length; i++) { t_SpilitUTF8Text[i] = t_SpilitUTF8Text[i].Trim(); if (t_SpilitUTF8Text[i].Length > 0) { t_OCR_Result.s_LineString.Add(t_SpilitUTF8Text[i]); } } Emgu.CV.OCR.Tesseract.Character[] t_Characters = f_OCR.GetCharacters(); int t_X = 0, t_Y = 0, t_Width = 0, t_Height = 0; bool t_IsFirstWord = true; string t_Text; Rectangle t_Region; for (int i = 0; i < t_Characters.Length; i++) { Emgu.CV.OCR.Tesseract.Character t_Character = t_Characters[i]; t_Text = t_Character.Text; t_Region = t_Character.Region; if (t_Character.Text == " ") { continue; } //Emgu.CV.OCR.Tesseract.Character t_LastCharacter = t_Characters[i]; if (t_IsFirstWord == true) { t_IsFirstWord = false; t_X = t_Character.Region.X; t_Y = t_Character.Region.Y; } if (t_Character.Text == System.Environment.NewLine.ToString()) { t_IsFirstWord = true; t_X = t_Character.Region.X; t_Y = t_Character.Region.Y; t_OCR_Result.s_LineRectangle.Add(new Rectangle(t_X, t_Y, t_Width, t_Height)); } } return(t_OCR_Result); }
public static List <string> GetLinesOfText(this Emgu.CV.OCR.Tesseract source) { return(source.GetUTF8Text()?.Split('\n').ToList()); }