static void __executeBackground(byte[] buf) { oTesseractRequest r = null; string guid = Encoding.ASCII.GetString(buf); var redis = new RedisBase(new RedisSetting(REDIS_TYPE.ONLY_READ, 1000)); try { string json = redis.HGET("_OCR_REQUEST", guid); r = JsonConvert.DeserializeObject <oTesseractRequest>(json); Bitmap bitmap = redis.HGET_BITMAP(r.redis_key, r.redis_field); if (bitmap != null) { r = __ocrExecute(r, bitmap, redis); } } catch (Exception ex) { if (r != null) { string error = ex.Message + Environment.NewLine + ex.StackTrace + Environment.NewLine + "----------------" + Environment.NewLine + JsonConvert.SerializeObject(r); r.ok = -1; redis.HSET("_OCR_REQ_ERR", r.requestId, error); } } if (r != null) { redis.HSET("_OCR_REQUEST", r.requestId, JsonConvert.SerializeObject(r, Formatting.Indented)); redis.HSET("_OCR_REQ_LOG", r.requestId, r.ok.ToString()); redis.PUBLISH("__TESSERACT_OUT", r.requestId); } }
static oTesseractRequest __ocrExecute2(oTesseractRequest req, Bitmap bitmap) { using (TesseractProcessor processor = new TesseractProcessor()) { processor.InitForAnalysePage(); using (GreyImage greyImage = GreyImage.FromImage(bitmap)) { //greyImage.Save(ImageFormat.Bmp, outFile2); ImageThresholder thresholder = new AdaptiveThresholder(); using (BinaryImage binImage = thresholder.Threshold(greyImage)) { DocumentLayout doc = null; switch (req.command) { case TESSERACT_COMMAND.GET_TEXT: //string s = tes.GetText().Trim(); //req.output_text = s; //req.output_count = s.Length; req.ok = 1; break; default: unsafe { doc = processor.AnalyseLayoutBinaryImage( binImage.BinaryData, greyImage.Width, greyImage.Height); } if (doc != null) { var bs = new List <string>(); if (doc.Blocks.Count > 0) { for (int i = 0; i < doc.Blocks.Count; i++) { for (int j = 0; j < doc.Blocks[i].Paragraphs.Count; j++) { bs.AddRange(doc.Blocks[j].Paragraphs[j].Lines .Select(x => string.Format( "{0}_{1}_{2}_{3}", x.Left, x.Top, x.Right, x.Bottom))); } } } req.output_format = "left_top_right_bottom"; req.output_text = string.Join("|", bs.ToArray()); req.output_count = bs.Count; req.ok = 1; } break; } } } } return(req); }
static oTesseractRequest __ocrExecute(oTesseractRequest req, Bitmap image) { PageIteratorLevel level = PageIteratorLevel.Word; switch (req.command) { case TESSERACT_COMMAND.GET_SEGMENTED_REGION_BLOCK: level = PageIteratorLevel.Block; break; case TESSERACT_COMMAND.GET_SEGMENTED_REGION_PARA: level = PageIteratorLevel.Para; break; case TESSERACT_COMMAND.GET_SEGMENTED_REGION_SYMBOL: level = PageIteratorLevel.Symbol; break; case TESSERACT_COMMAND.GET_SEGMENTED_REGION_TEXTLINE: level = PageIteratorLevel.TextLine; break; case TESSERACT_COMMAND.GET_SEGMENTED_REGION_WORD: level = PageIteratorLevel.Word; break; case TESSERACT_COMMAND.GET_TEXT: break; } EngineMode mode = EngineMode.Default; switch (req.mode) { case ENGINE_MODE.LSTM_ONLY: mode = EngineMode.LstmOnly; break; case ENGINE_MODE.TESSERACT_AND_LSTM: mode = EngineMode.TesseractAndLstm; break; case ENGINE_MODE.TESSERACT_ONLY: mode = EngineMode.TesseractOnly; break; } using (var engine = new TesseractEngine(req.data_path, req.lang, mode)) using (var pix = new BitmapToPixConverter().Convert(image)) { using (var tes = engine.Process(pix)) { switch (req.command) { case TESSERACT_COMMAND.GET_TEXT: string s = tes.GetText().Trim(); req.output_text = s; req.output_count = s.Length; req.ok = 1; break; default: var boxes = tes.GetSegmentedRegions(level).Select(x => string.Format("{0}_{1}_{2}_{3}", x.X, x.Y, x.Width, x.Height)).ToArray(); req.output_format = "x_y_width_height"; req.output_text = string.Join("|", boxes.Select(x => x.ToString()).ToArray()); req.output_count = boxes.Length; req.ok = 1; break; } } } return(req); }
static oTesseractRequest __ocrExecute(oTesseractRequest req, Bitmap bitmap) { using (TesseractProcessor processor = new TesseractProcessor()) { // call SetVariable() method before passing image(api->SetImage(image)) // 0: otsu // 1: isodata local adaptive // 2: sauvola local adaptive => not implement yet //processor.SetVariable("tessedit_thresholding_method", "0"); processor.SetVariable("tessedit_thresholding_method", "1"); //processor.InitForAnalysePage(); //processor.SetPageSegMode(ePageSegMode.PSM_AUTO_ONLY); //var success = processor.Init(req.data_path, req.lang, (int)eOcrEngineMode.OEM_DEFAULT); //var imageColor = new Emgu.CV.Mat(); //var imageCV = new Emgu.CV.Image<Emgu.CV.Structure.Bgr, byte>(bitmap); //Image Class from Emgu.CV ////var imageCV = new Emgu.CV.Image<Emgu.CV.Structure.Gray, byte>(bitmap); //Image Class from Emgu.CV //var image = imageCV.Mat; //This is your Image converted to Mat //if (image.NumberOfChannels == 1) // Emgu.CV.CvInvoke.CvtColor(image, imageColor, Emgu.CV.CvEnum.ColorConversion.Gray2Bgr); //else // image.CopyTo(imageColor); using (var m0 = new MemoryStream()) { bitmap.Save(m0, ImageFormat.Jpeg); //using (Bitmap bmp = Bitmap.FromFile(@"C:\temp\1.jpg") as Bitmap) using (Bitmap bmp = new Bitmap(m0)) { DocumentLayout doc = null; switch (req.command) { case TESSERACT_COMMAND.GET_TEXT: //string s = tes.GetText().Trim(); //req.output_text = s; //req.output_count = s.Length; req.ok = 1; break; default: unsafe { doc = processor.AnalyseLayout(bmp); } if (doc != null) { var bs = new List <string>(); if (doc.Blocks.Count > 0) { for (int i = 0; i < doc.Blocks.Count; i++) { for (int j = 0; j < doc.Blocks[i].Paragraphs.Count; j++) { bs.AddRange(doc.Blocks[j].Paragraphs[j].Lines .Select(x => string.Format( "{0}_{1}_{2}_{3}", x.Left, x.Top, x.Right, x.Bottom))); } } } req.output_format = "left_top_right_bottom"; req.output_text = string.Join("|", bs.ToArray()); req.output_count = bs.Count; req.ok = 1; } break; } } } } return(req); }