private static void AnalyseLayout() { string imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\phototest.tif"; //imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\eurotext.tif"; TesseractProcessor processor = new TesseractProcessor(); processor.InitForAnalysePage(); processor.SetPageSegMode(ePageSegMode.PSM_AUTO); using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { DocumentLayout doc = processor.AnalyseLayout(bmp); Console.WriteLine(doc.ToString()); using (Image tmp = new Bitmap(bmp.Width, bmp.Height)) // prevents one-byte index format { using (Graphics grph = Graphics.FromImage(tmp)) { Rectangle rect = new Rectangle(0, 0, tmp.Width, tmp.Height); grph.DrawImage(bmp, rect, rect, GraphicsUnit.Pixel); grph.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; foreach (Block block in doc.Blocks) { DrawBlock(grph, block); } } tmp.Save(@"D:\temp\page_layout_test2.bmp"); } } }
private static void AnalyseLayout() { string imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\phototest.tif"; //imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\eurotext.tif"; TesseractProcessor processor = new TesseractProcessor(); processor.InitForAnalysePage(); processor.SetPageSegMode(ePageSegMode.PSM_AUTO); using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { DocumentLayout doc = processor.AnalyseLayout(bmp); Console.WriteLine(doc.ToString()); using (Image tmp = new Bitmap(bmp.Width, bmp.Height)) // prevents one-byte index format { using (Graphics grph = Graphics.FromImage(tmp)) { Rectangle rect = new Rectangle(0, 0, tmp.Width, tmp.Height); grph.DrawImage(bmp, rect, rect, GraphicsUnit.Pixel); grph.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; foreach (Block block in doc.Blocks) { DrawBlock(grph, block); } } tmp.Save(@"D:\temp\page_layout_test2.bmp"); } } }
public override string RecognizeText(IList <Image> images, string lang) { string tessdata = Path.Combine(basedir, TESSDATA); TesseractProcessor processor = new TesseractProcessor(); processor.Init(tessdata, lang, oem); processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), PSM)); StringBuilder strB = new StringBuilder(); foreach (Image image in images) { //if (rect != Rectangle.Empty) //{ // processor.UseROI = true; // processor.ROI = rect; //} string text = processor.Recognize(image); if (text == null) { return(String.Empty); } strB.Append(text); } return(strB.ToString().Replace("\n", Environment.NewLine)); }
private void button1_Click(object sender, EventArgs e) { string d = textBox_Path.Text; if (!Directory.Exists(d)) { MessageBox.Show("目录" + d + "不存在"); return; } if (!d.EndsWith("\\")) { d += "\\"; } string d1 = d + "new\\"; if (!Directory.Exists(d1)) { Directory.CreateDirectory(d1); } Regex reg = new Regex(@"\d+\.\d\d", RegexOptions.Compiled); foreach (string f in Directory.GetFiles(textBox_Path.Text)) { if (!f.ToLower().EndsWith(".png") && !f.ToLower().EndsWith(".jpg") && !f.ToLower().EndsWith(".bmp")) { continue; } Bitmap bmp = new Bitmap(f); TesseractProcessor process = new TesseractProcessor(); process.SetPageSegMode(ePageSegMode.PSM_SINGLE_LINE); process.Init(System.Environment.CurrentDirectory + "\\", "chi_sim", (int)eOcrEngineMode.OEM_DEFAULT); try { string result = process.Recognize(bmp); Match m = reg.Match(result); if (m.Success) { string amount = m.ToString(); File.Copy(f, d1 + amount + Path.GetExtension(f), true); } else { File.Copy(f, d1 + Path.GetFileName(f), true); } } catch (Exception) { MessageBox.Show("识别文件出错:" + f); File.Copy(f, d1 + Path.GetFileName(f), true); } bmp.Dispose(); } System.Diagnostics.Process.Start(d1); }
public string UseTesseract(string imgFile) { this.ErrMsg = string.Empty; string defLang = "eng"; string strResult = string.Empty; try { using (TesseractProcessor processor = new TesseractProcessor()) { processor.Init(this.TESSDATA, defLang, this.oem); processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), this.pageSegMode)); #if DEBUG System.Diagnostics.Debug.WriteLine("processor:"); System.Diagnostics.Debug.WriteLine(processor.GetTesseractEngineVersion()); #endif string strIndicate = Path.GetFileNameWithoutExtension(imgFile); strResult = processor.Recognize(imgFile); if (!string.IsNullOrEmpty(strResult)) { // correct common errors caused by OCR strResult = this.CorrectOCRErrors(strResult); // correct letter cases strResult = this.CorrectLetterCases(strResult); } } strResult = strResult.Replace("\n", Environment.NewLine); } catch (Exception ex) { this.ErrMsg = ex.ToString(); strResult = string.Empty; } return strResult; }