public override string RecognizeText(IList <Image> images, string lang) { string tessdata = Path.Combine(basedir, TESSDATA); using (TesseractEngine engine = new TesseractEngine(tessdata, lang, EngineMode.Default)) { engine.SetVariable("tessedit_create_hocr", Hocr ? "1" : "0"); Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode); StringBuilder strB = new StringBuilder(); int pageNum = 0; foreach (Image image in images) { pageNum++; using (Pix pix = ConvertBitmapToPix(image)) { using (Page page = engine.Process(pix, psm)) { string text = Hocr ? page.GetHOCRText(pageNum - 1) : page.GetText(); if (text == null) { return(String.Empty); } strB.Append(text); } } } return(strB.ToString().Replace("\n", Environment.NewLine)); } }
protected override void LoadRegistryInfo(RegistryKey regkey) { base.LoadRegistryInfo(regkey); selectedPSM = (string)regkey.GetValue(strPSM, Enum.GetName(typeof(PageSegMode), Tesseract.PageSegMode.Auto)); try { // validate PSM value Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), selectedPSM); } catch { selectedPSM = Enum.GetName(typeof(PageSegMode), Tesseract.PageSegMode.Auto); } }
/// <summary> /// 识别白底黑字的二值化字符图片 /// </summary> /// <param name="bmp">白底黑字的二值化字符图片</param> /// <param name="mode">页面分析模式</param> /// <returns>识别出的字符串</returns> protected virtual string CrackCodeCore(Bitmap bmp, PageSegMode mode) { if (_engine == null) { throw new InvalidOperationException("Tesseract识别引擎 未初始化"); } Tesseract.PageSegMode tmode = mode.CastTo(Tesseract.PageSegMode.Auto); using (Page page = _engine.Process(bmp, tmode)) { string text = page.GetText().Replace("\n", "").Replace(" ", ""); if (mode == PageSegMode.SingleBlockVertText) { text = text.ReverseString(); } return(text); } }
/// <summary> /// Processes a file using ResultRenderers. /// </summary> /// <param name="renderer"></param> /// <param name="filename"></param> private void ProcessImageFile(IResultRenderer renderer, string filename) { IEnumerable <string> configs_file = new List <string>() { CONFIGS_FILE }; using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file)) { ControlParameters(engine); Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode); var imageName = Path.GetFileNameWithoutExtension(filename); using (var pixA = LoadPixArray(filename)) { using (renderer.BeginDocument(imageName)) { foreach (var pix in pixA) { Pix pixd = null; try { if (Deskew) { pixd = pix.Deskew(new ScewSweep(range: 45), Pix.DefaultBinarySearchReduction, Pix.DefaultBinaryThreshold, out Scew scew); } using (var page = engine.Process(pixd ?? pix, imageName, psm)) { var addedPage = renderer.AddPage(page); } } finally { if (pixd != null) { ((IDisposable)pixd).Dispose(); } } } } } } }
public override string RecognizeText(IList <Image> images, string inputName) { IEnumerable <string> configs_file = new List <string>() { CONFIGS_FILE }; using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file)) { engine.SetVariable("tessedit_create_hocr", OutputFormat == "hocr" ? "1" : "0"); ControlParameters(engine); Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode); StringBuilder strB = new StringBuilder(); int pageNum = 0; foreach (Image image in images) { pageNum++; using (Pix pix = ConvertBitmapToPix(image)) { using (Page page = engine.Process(pix, inputName, psm)) { string text = OutputFormat == "hocr" ? page.GetHOCRText(pageNum - 1) : page.GetText(); if (text == null) { return(String.Empty); } strB.Append(text); } } } return(strB.ToString().Replace("\n", Environment.NewLine)); } }