예제 #1
0
    static void __executeBackground(byte[] buf)
    {
        oTesseractRequest r    = null;
        string            guid = Encoding.ASCII.GetString(buf);
        var redis = new RedisBase(new RedisSetting(REDIS_TYPE.ONLY_READ, 1000));

        try
        {
            string json = redis.HGET("_OCR_REQUEST", guid);
            r = JsonConvert.DeserializeObject <oTesseractRequest>(json);
            Bitmap bitmap = redis.HGET_BITMAP(r.redis_key, r.redis_field);
            if (bitmap != null)
            {
                r = __ocrExecute(r, bitmap, redis);
            }
        }
        catch (Exception ex)
        {
            if (r != null)
            {
                string error = ex.Message + Environment.NewLine + ex.StackTrace
                               + Environment.NewLine + "----------------" + Environment.NewLine +
                               JsonConvert.SerializeObject(r);
                r.ok = -1;
                redis.HSET("_OCR_REQ_ERR", r.requestId, error);
            }
        }

        if (r != null)
        {
            redis.HSET("_OCR_REQUEST", r.requestId, JsonConvert.SerializeObject(r, Formatting.Indented));
            redis.HSET("_OCR_REQ_LOG", r.requestId, r.ok.ToString());
            redis.PUBLISH("__TESSERACT_OUT", r.requestId);
        }
    }
예제 #2
0
    static oTesseractRequest __ocrExecute2(oTesseractRequest req, Bitmap bitmap)
    {
        using (TesseractProcessor processor = new TesseractProcessor())
        {
            processor.InitForAnalysePage();
            using (GreyImage greyImage = GreyImage.FromImage(bitmap))
            {
                //greyImage.Save(ImageFormat.Bmp, outFile2);

                ImageThresholder thresholder = new AdaptiveThresholder();
                using (BinaryImage binImage = thresholder.Threshold(greyImage))
                {
                    DocumentLayout doc = null;
                    switch (req.command)
                    {
                    case TESSERACT_COMMAND.GET_TEXT:
                        //string s = tes.GetText().Trim();
                        //req.output_text = s;
                        //req.output_count = s.Length;
                        req.ok = 1;
                        break;

                    default:
                        unsafe
                        {
                            doc = processor.AnalyseLayoutBinaryImage(
                                binImage.BinaryData, greyImage.Width, greyImage.Height);
                        }
                        if (doc != null)
                        {
                            var bs = new List <string>();
                            if (doc.Blocks.Count > 0)
                            {
                                for (int i = 0; i < doc.Blocks.Count; i++)
                                {
                                    for (int j = 0; j < doc.Blocks[i].Paragraphs.Count; j++)
                                    {
                                        bs.AddRange(doc.Blocks[j].Paragraphs[j].Lines
                                                    .Select(x => string.Format(
                                                                "{0}_{1}_{2}_{3}", x.Left, x.Top, x.Right, x.Bottom)));
                                    }
                                }
                            }
                            req.output_format = "left_top_right_bottom";
                            req.output_text   = string.Join("|", bs.ToArray());
                            req.output_count  = bs.Count;
                            req.ok            = 1;
                        }
                        break;
                    }
                }
            }
        }

        return(req);
    }
예제 #3
0
    static oTesseractRequest __ocrExecute(oTesseractRequest req, Bitmap image)
    {
        PageIteratorLevel level = PageIteratorLevel.Word;

        switch (req.command)
        {
        case TESSERACT_COMMAND.GET_SEGMENTED_REGION_BLOCK:
            level = PageIteratorLevel.Block;
            break;

        case TESSERACT_COMMAND.GET_SEGMENTED_REGION_PARA:
            level = PageIteratorLevel.Para;
            break;

        case TESSERACT_COMMAND.GET_SEGMENTED_REGION_SYMBOL:
            level = PageIteratorLevel.Symbol;
            break;

        case TESSERACT_COMMAND.GET_SEGMENTED_REGION_TEXTLINE:
            level = PageIteratorLevel.TextLine;
            break;

        case TESSERACT_COMMAND.GET_SEGMENTED_REGION_WORD:
            level = PageIteratorLevel.Word;
            break;

        case TESSERACT_COMMAND.GET_TEXT:
            break;
        }

        EngineMode mode = EngineMode.Default;

        switch (req.mode)
        {
        case ENGINE_MODE.LSTM_ONLY:
            mode = EngineMode.LstmOnly;
            break;

        case ENGINE_MODE.TESSERACT_AND_LSTM:
            mode = EngineMode.TesseractAndLstm;
            break;

        case ENGINE_MODE.TESSERACT_ONLY:
            mode = EngineMode.TesseractOnly;
            break;
        }

        using (var engine = new TesseractEngine(req.data_path, req.lang, mode))
            using (var pix = new BitmapToPixConverter().Convert(image))
            {
                using (var tes = engine.Process(pix))
                {
                    switch (req.command)
                    {
                    case TESSERACT_COMMAND.GET_TEXT:
                        string s = tes.GetText().Trim();
                        req.output_text  = s;
                        req.output_count = s.Length;
                        req.ok           = 1;
                        break;

                    default:
                        var boxes = tes.GetSegmentedRegions(level).Select(x =>
                                                                          string.Format("{0}_{1}_{2}_{3}", x.X, x.Y, x.Width, x.Height)).ToArray();
                        req.output_format = "x_y_width_height";
                        req.output_text   = string.Join("|", boxes.Select(x => x.ToString()).ToArray());
                        req.output_count  = boxes.Length;
                        req.ok            = 1;
                        break;
                    }
                }
            }
        return(req);
    }
예제 #4
0
    static oTesseractRequest __ocrExecute(oTesseractRequest req, Bitmap bitmap)
    {
        using (TesseractProcessor processor = new TesseractProcessor())
        {
            // call SetVariable() method before passing image(api->SetImage(image))
            // 0: otsu
            // 1: isodata local adaptive
            // 2: sauvola local adaptive => not implement yet
            //processor.SetVariable("tessedit_thresholding_method", "0");
            processor.SetVariable("tessedit_thresholding_method", "1");

            //processor.InitForAnalysePage();
            //processor.SetPageSegMode(ePageSegMode.PSM_AUTO_ONLY);
            //var success = processor.Init(req.data_path, req.lang, (int)eOcrEngineMode.OEM_DEFAULT);

            //var imageColor = new Emgu.CV.Mat();
            //var imageCV = new Emgu.CV.Image<Emgu.CV.Structure.Bgr, byte>(bitmap); //Image Class from Emgu.CV
            ////var imageCV = new Emgu.CV.Image<Emgu.CV.Structure.Gray, byte>(bitmap); //Image Class from Emgu.CV
            //var image = imageCV.Mat; //This is your Image converted to Mat

            //if (image.NumberOfChannels == 1)
            //    Emgu.CV.CvInvoke.CvtColor(image, imageColor, Emgu.CV.CvEnum.ColorConversion.Gray2Bgr);
            //else
            //    image.CopyTo(imageColor);

            using (var m0 = new MemoryStream())
            {
                bitmap.Save(m0, ImageFormat.Jpeg);
                //using (Bitmap bmp = Bitmap.FromFile(@"C:\temp\1.jpg") as Bitmap)
                using (Bitmap bmp = new Bitmap(m0))
                {
                    DocumentLayout doc = null;
                    switch (req.command)
                    {
                    case TESSERACT_COMMAND.GET_TEXT:
                        //string s = tes.GetText().Trim();
                        //req.output_text = s;
                        //req.output_count = s.Length;
                        req.ok = 1;
                        break;

                    default:
                        unsafe
                        {
                            doc = processor.AnalyseLayout(bmp);
                        }
                        if (doc != null)
                        {
                            var bs = new List <string>();
                            if (doc.Blocks.Count > 0)
                            {
                                for (int i = 0; i < doc.Blocks.Count; i++)
                                {
                                    for (int j = 0; j < doc.Blocks[i].Paragraphs.Count; j++)
                                    {
                                        bs.AddRange(doc.Blocks[j].Paragraphs[j].Lines
                                                    .Select(x => string.Format(
                                                                "{0}_{1}_{2}_{3}", x.Left, x.Top, x.Right, x.Bottom)));
                                    }
                                }
                            }
                            req.output_format = "left_top_right_bottom";
                            req.output_text   = string.Join("|", bs.ToArray());
                            req.output_count  = bs.Count;
                            req.ok            = 1;
                        }
                        break;
                    }
                }
            }
        }

        return(req);
    }