Esempio n. 1
0
        public override string RecognizeText(IList <Image> images, string lang)
        {
            string tessdata = Path.Combine(basedir, TESSDATA);

            using (TesseractEngine engine = new TesseractEngine(tessdata, lang, EngineMode.Default))
            {
                engine.SetVariable("tessedit_create_hocr", Hocr ? "1" : "0");
                Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode);

                StringBuilder strB    = new StringBuilder();
                int           pageNum = 0;

                foreach (Image image in images)
                {
                    pageNum++;
                    using (Pix pix = ConvertBitmapToPix(image))
                    {
                        using (Page page = engine.Process(pix, psm))
                        {
                            string text = Hocr ? page.GetHOCRText(pageNum - 1) : page.GetText();

                            if (text == null)
                            {
                                return(String.Empty);
                            }
                            strB.Append(text);
                        }
                    }
                }

                return(strB.ToString().Replace("\n", Environment.NewLine));
            }
        }
Esempio n. 2
0
 protected override void LoadRegistryInfo(RegistryKey regkey)
 {
     base.LoadRegistryInfo(regkey);
     selectedPSM = (string)regkey.GetValue(strPSM, Enum.GetName(typeof(PageSegMode), Tesseract.PageSegMode.Auto));
     try
     {
         // validate PSM value
         Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), selectedPSM);
     }
     catch
     {
         selectedPSM = Enum.GetName(typeof(PageSegMode), Tesseract.PageSegMode.Auto);
     }
 }
Esempio n. 3
0
 /// <summary>
 /// 识别白底黑字的二值化字符图片
 /// </summary>
 /// <param name="bmp">白底黑字的二值化字符图片</param>
 /// <param name="mode">页面分析模式</param>
 /// <returns>识别出的字符串</returns>
 protected virtual string CrackCodeCore(Bitmap bmp, PageSegMode mode)
 {
     if (_engine == null)
     {
         throw new InvalidOperationException("Tesseract识别引擎 未初始化");
     }
     Tesseract.PageSegMode tmode = mode.CastTo(Tesseract.PageSegMode.Auto);
     using (Page page = _engine.Process(bmp, tmode))
     {
         string text = page.GetText().Replace("\n", "").Replace(" ", "");
         if (mode == PageSegMode.SingleBlockVertText)
         {
             text = text.ReverseString();
         }
         return(text);
     }
 }
Esempio n. 4
0
        /// <summary>
        /// Processes a file using ResultRenderers.
        /// </summary>
        /// <param name="renderer"></param>
        /// <param name="filename"></param>
        private void ProcessImageFile(IResultRenderer renderer, string filename)
        {
            IEnumerable <string> configs_file = new List <string>()
            {
                CONFIGS_FILE
            };

            using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file))
            {
                ControlParameters(engine);
                Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode);

                var imageName = Path.GetFileNameWithoutExtension(filename);

                using (var pixA = LoadPixArray(filename))
                {
                    using (renderer.BeginDocument(imageName))
                    {
                        foreach (var pix in pixA)
                        {
                            Pix pixd = null;

                            try
                            {
                                if (Deskew)
                                {
                                    pixd = pix.Deskew(new ScewSweep(range: 45), Pix.DefaultBinarySearchReduction, Pix.DefaultBinaryThreshold, out Scew scew);
                                }
                                using (var page = engine.Process(pixd ?? pix, imageName, psm))
                                {
                                    var addedPage = renderer.AddPage(page);
                                }
                            }
                            finally
                            {
                                if (pixd != null)
                                {
                                    ((IDisposable)pixd).Dispose();
                                }
                            }
                        }
                    }
                }
            }
        }
Esempio n. 5
0
        public override string RecognizeText(IList <Image> images, string inputName)
        {
            IEnumerable <string> configs_file = new List <string>()
            {
                CONFIGS_FILE
            };

            using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file))
            {
                engine.SetVariable("tessedit_create_hocr", OutputFormat == "hocr" ? "1" : "0");
                ControlParameters(engine);
                Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode);

                StringBuilder strB    = new StringBuilder();
                int           pageNum = 0;

                foreach (Image image in images)
                {
                    pageNum++;
                    using (Pix pix = ConvertBitmapToPix(image))
                    {
                        using (Page page = engine.Process(pix, inputName, psm))
                        {
                            string text = OutputFormat == "hocr" ? page.GetHOCRText(pageNum - 1) : page.GetText();

                            if (text == null)
                            {
                                return(String.Empty);
                            }
                            strB.Append(text);
                        }
                    }
                }

                return(strB.ToString().Replace("\n", Environment.NewLine));
            }
        }