private const string m_lang = "eng"; // 识别的语言文件名称 public string read2dig(Image image) { TesseractProcessor m_tesseract = new TesseractProcessor(); ; bool succeed = m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT); if (!succeed) { return("tesseract初始化失败"); } m_tesseract.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 image.Save("./img/原图.bmp", System.Drawing.Imaging.ImageFormat.Bmp); Bitmap bmp = ToGray(image); bmp.Save("./img/灰度图.bmp", System.Drawing.Imaging.ImageFormat.Bmp); bmp = ConvertTo1Bpp1(bmp, 110); bmp.Save("./img/二值图.bmp", System.Drawing.Imaging.ImageFormat.Bmp); bmp = Zoom(bmp, 0.25); bmp.Save("./img/放大缩小后.bmp", System.Drawing.Imaging.ImageFormat.Bmp); m_tesseract.Clear(); m_tesseract.ClearAdaptiveClassifier(); return(m_tesseract.Apply(bmp)); }
public Form1() { InitializeComponent(); m_tesseract = new TesseractProcessor(); m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT); m_tesseract.SetVariable("tessedit_pageseg_mode", TesseractPageSegMode.PSM_AUTO.ToString()); }
private void button1_Click(object sender, EventArgs e) { Image mg = Image.FromFile(Application.StartupPath + "\\1.jpg"); ////Image mg = Image.FromFile("e:\\test24.tif"); imageViewer1.Initialize(new OCRRenderingData(), new OCRAnalysisRender(imageViewer1)); imageViewer1.Image = mg; TesseractProcessor _ocrProcessor = null; _ocrProcessor = new TesseractProcessor(); //string _tessData = "E:\\tesseract\\tesseract-ocr-dotnet-3.01\\tessdata\\"; string _tessData = Application.StartupPath + "\\tessdata\\"; string _lang = "eng"; int _ocrEngineMode = 3; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); _ocrProcessor.ROI = imageViewer1.RectA; _ocrProcessor.UseROI = true; string text = _ocrProcessor.Recognize(mg); //string text1 = text.Replace(" ", ""); MessageBox.Show(text); }
public static void Main(string[] args) { const string language = "eng"; string imageFile = args[0]; TesseractProcessor processor = new TesseractProcessor(); using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { var success = processor.Init(TessractData, language, (int)eOcrEngineMode.OEM_DEFAULT); if (!success) { Console.WriteLine("Failed to initialize tesseract."); } else { // use thresholder processor.UseThresholder(); string text = processor.Recognize(bmp); Console.WriteLine("Text:"); Console.WriteLine("*****************************"); Console.WriteLine(text); Console.WriteLine("*****************************"); } } Console.WriteLine("Press any key to exit."); Console.ReadKey(); }
public Form1() { InitializeComponent(); this.PSN = new Person(); this.imagePath = Application.StartupPath + "/image"; if (Directory.Exists(this.imagePath) == false) Directory.CreateDirectory(this.imagePath); this.imagePath += "/"; Init(); this.F = new mPublic(); this.CFG = new Config(); this.MAPP = new Mapping(); this.RMap = CFG.rect_map_now; this.thKill = new Thread(new ThreadStart(AutoKill)); this.stopKill = true; this.ocrNumber = new OCR(); /* //初始化数字识别引擎 ocrNumber = new TesseractProcessor(); bInitN = ocrNumber.Init(@".\", "chi_sim", 3); if (!bInitN) { MessageBox.Show("智能识别引擎初始化失败!"); return; } ocrNumber.SetVariable("tessedit_char_whitelist", ".,2134567890 "); * */ //初始化汉字引擎 ocrText = new TesseractProcessor(); bInitT = ocrText.Init(@".\", "chi_sim", 3); if (!bInitT) { MessageBox.Show("智能识别引擎初始化失败!"); return; } ocrText.SetVariable("tessedit_char_blacklist", "fi蜘fi'£郜QWERTYUIOPASDFGHJKLZXCVBNM1234567890μqwertyuiopasdfghjklzxcvbnm"); }
public override string RecognizeText(IList <Image> images, string lang) { string tessdata = Path.Combine(basedir, TESSDATA); TesseractProcessor processor = new TesseractProcessor(); processor.Init(tessdata, lang, oem); processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), PSM)); StringBuilder strB = new StringBuilder(); foreach (Image image in images) { //if (rect != Rectangle.Empty) //{ // processor.UseROI = true; // processor.ROI = rect; //} string text = processor.Recognize(image); if (text == null) { return(String.Empty); } strB.Append(text); } return(strB.ToString().Replace("\n", Environment.NewLine)); }
public static void Recognize() { int n_images = Workspace.Images.Length; int i_image = n_images - 1; //i_image = 0; i_image = 2; string fileName = Workspace.Images[i_image]; string imageFile = Path.Combine(Workspace.InputFolder, fileName); string language = "eng"; int oem = (int)eOcrEngineMode.OEM_DEFAULT; string name = Path.GetFileNameWithoutExtension(imageFile); { using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { using (GreyImage greyImage = GreyImage.FromImage(bmp)) { ImageThresholder thresholder = new AdaptiveThresholder(); using (BinaryImage binImage = thresholder.Threshold(greyImage)) { DateTime started = DateTime.Now; DateTime ended = DateTime.Now; Rectangle[] rois = new Rectangle[] { Rectangle.FromLTRB(807, 43, 1351, 613), Rectangle.FromLTRB(4, 604, binImage.Width - 15, binImage.Height-35) }; int nROIs = rois.Length; string[] texts = new string[nROIs]; #if PARALLEL Parallel.For(0, nROIs, delegate(int iROI) #else using (TesseractProcessor processor = new TesseractProcessor()) for (int iROI = 0; iROI < nROIs; iROI++) #endif { #if PARALLEL using (TesseractProcessor processor = new TesseractProcessor()) #endif { Rectangle roi = rois[iROI]; { //oem = (int)eOcrEngineMode.OEM_TESSERACT_CUBE_COMBINED; processor.Init(Workspace.TessdataFolder, language, oem); processor.UseROI = true; processor.ROI = roi; unsafe { texts[iROI] = processor.RecognizeBinaryImage( binImage.BinaryData, binImage.Width, binImage.Height); } } } }
public static string Recognize(string url) { WebRequest request = WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream st = response.GetResponseStream(); if (st == null) { return(string.Empty); } try { Bitmap bitmap = ((Bitmap)Bitmap.FromStream(st)).toGray().biLinear(2); //var newBitmap= KiCut(bitmap, 12, 22, bitmap.Width - 12, 22); string tessdata = Environment.CurrentDirectory + "\\tessdata\\"; string language = "eng";//设置训练文件的名称,后缀traineddata之前的名称 int oem = 3; // http://www.lixin.me/blog/2012/05/26/29536 训练 using (TesseractProcessor processor = new TesseractProcessor()) { //初始化 bool initFlage = processor.Init(tessdata, language, oem); //processor.GetTesseractEngineVersion();//获取版本号 /* * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. * The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ //processor.SetPageSegMode(ePageSegMode.PSM_SINGLE_BLOCK); //设置ROI(图像的感兴趣区域) processor.UseROI = true; processor.ROI = new Rectangle(24, 0, bitmap.Width - 24, bitmap.Height); //设置识别的变量 如果是自定义培训的文件 可以不用设置 //必须在初始化后调用 processor.SetVariable("tessedit_char_whitelist", "0123456789."); //processor.SetVariable("tessedit_thresholding_method", "1"); //图像处理阀值是否打开 //processor.SetVariable("save_best_choices", "T"); using (Bitmap bmp = bitmap) { int i = 3; oem = i; string text = processor.Recognize(bmp); char[] charsToTrim = { '\\', 'n', '\\', 'n' }; return(text.TrimEnd(charsToTrim)); //Console.WriteLine( // string.Format( // "RecognizeMode: {1}\nText:\n{0}\n++++++\n", text, ((eOcrEngineMode)oem).ToString())); } } } catch (Exception) { return(string.Empty); } }
public TesseractOCREngine(Image image) { this.image = image; m_tesseract = new TesseractProcessor(); bool succeed = m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT); m_tesseract.SetVariable("tessedit_char_whitelist", "0123456789-.eExXpP/\\"); m_tesseract.SetVariable("tessedit_pageseg_mode", ((int)TesseractPageSegMode.PSM_SINGLE_LINE).ToString()); System.Environment.CurrentDirectory = System.IO.Path.GetFullPath(m_path); }
private void button1_Click(object sender, EventArgs e) { string d = textBox_Path.Text; if (!Directory.Exists(d)) { MessageBox.Show("目录" + d + "不存在"); return; } if (!d.EndsWith("\\")) { d += "\\"; } string d1 = d + "new\\"; if (!Directory.Exists(d1)) { Directory.CreateDirectory(d1); } Regex reg = new Regex(@"\d+\.\d\d", RegexOptions.Compiled); foreach (string f in Directory.GetFiles(textBox_Path.Text)) { if (!f.ToLower().EndsWith(".png") && !f.ToLower().EndsWith(".jpg") && !f.ToLower().EndsWith(".bmp")) { continue; } Bitmap bmp = new Bitmap(f); TesseractProcessor process = new TesseractProcessor(); process.SetPageSegMode(ePageSegMode.PSM_SINGLE_LINE); process.Init(System.Environment.CurrentDirectory + "\\", "chi_sim", (int)eOcrEngineMode.OEM_DEFAULT); try { string result = process.Recognize(bmp); Match m = reg.Match(result); if (m.Success) { string amount = m.ToString(); File.Copy(f, d1 + amount + Path.GetExtension(f), true); } else { File.Copy(f, d1 + Path.GetFileName(f), true); } } catch (Exception) { MessageBox.Show("识别文件出错:" + f); File.Copy(f, d1 + Path.GetFileName(f), true); } bmp.Dispose(); } System.Diagnostics.Process.Start(d1); }
static void Simple2_Recognize() { int n_images = Images.Length; int i_image = n_images - 1; //i_image = 0; string fileName = Images[i_image]; string imageFile = Path.Combine(InputFolder, fileName); string language = "eng"; int oem = (int)eOcrEngineMode.OEM_DEFAULT; using (TesseractProcessor processor = new TesseractProcessor()) { using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { using (GreyImage greyImage = GreyImage.FromImage(bmp)) { ImageThresholder thresholder = new AdaptiveThresholder(); using (BinaryImage binImage = thresholder.Threshold(greyImage)) { DateTime started = DateTime.Now; DateTime ended = DateTime.Now; int i = 3; //for (i = 0; i < 4; i++) //for (i = 3; i < 4; i++) { oem = i; processor.Init(TessdataFolder, language, oem); string text = ""; unsafe { started = DateTime.Now; text = processor.RecognizeBinaryImage( binImage.BinaryData, greyImage.Width, greyImage.Height); ended = DateTime.Now; Console.WriteLine("Duration recognition: {0} ms\n\n", (ended - started).TotalMilliseconds); } Console.WriteLine( string.Format("RecognizeMode: {1}\nRecognized Text:\n{0}\n++++++++++++++++++++++++++++++++\n", text, ((eOcrEngineMode)oem).ToString())); } } } } } }
public static void Recognize() { int n_images = Workspace.Images.Length; int i_image = n_images - 1; i_image = 0; string fileName = Workspace.Images[i_image]; string imageFile = Path.Combine(Workspace.InputFolder, fileName); string language = "eng"; int oem = 3; TesseractProcessor processor = new TesseractProcessor(); using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { GreyImage greyImage = GreyImage.FromImage(bmp); DateTime started = DateTime.Now; AdaptiveThresholder thresholder = new AdaptiveThresholder(); BinaryImage binImage = thresholder.Threshold(greyImage); DateTime ended = DateTime.Now; Console.WriteLine("Duration thresholding: {0} ms", (ended - started).TotalMilliseconds); binImage.Invert(); //for (int i = 0; i < 4; i++) for (int i = 3; i < 4; i++) { oem = i; oem = (int)eOcrEngineMode.OEM_TESSERACT_CUBE_COMBINED; processor.Init(Workspace.TessdataFolder, language, oem); string text = ""; unsafe { started = DateTime.Now; //string text = processor.Recognize(bmp); text = processor.RecognizeBinaryImage( binImage.BinaryData, binImage.Width, binImage.Height); ended = DateTime.Now; Console.WriteLine("Duration recognition: {0} ms\n\n", (ended - started).TotalMilliseconds); } Console.WriteLine( string.Format("RecognizeMode: {1}\nText:\n{0}\n++++++++++++++++++++++++++++++++\n", text, ((eOcrEngineMode)oem).ToString())); } } }
protected override void OnLoad(EventArgs e) { base.OnLoad(e); this.LoadGeneralSettings(); _ocrProcessor = new TesseractProcessor(); _ocrProcessor.DoMonitor = true; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); Console.WriteLine(string.Format("[DEBUG] Init status: {0}", status)); }
private void button1_Click(object sender, EventArgs e) { Image mg = Image.FromFile(Application.StartupPath + "\\1.jpg"); ////Image mg = Image.FromFile("e:\\test24.tif"); templateViewer1.Initialize(new OCRRenderingData(), new OCRAnalysisRender(templateViewer1)); templateViewer1.Image = mg; TesseractProcessor _ocrProcessor = null; _ocrProcessor = new TesseractProcessor(); string _tessData = Application.StartupPath + "\\tessdata\\"; string _lang = "eng"; int _ocrEngineMode = 3; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); _ocrProcessor.ROI = templateViewer1.RectA; rec += templateViewer1.RectA.X + ";" + templateViewer1.RectA.Y + ";" + templateViewer1.RectA.Width + ";" + templateViewer1.RectA.Height + "|"; _ocrProcessor.UseROI = true; string text = _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectB; rec += templateViewer1.RectB.X + ";" + templateViewer1.RectB.Y + ";" + templateViewer1.RectB.Width + ";" + templateViewer1.RectB.Height + "|"; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectC; rec += templateViewer1.RectC.X + ";" + templateViewer1.RectC.Y + ";" + templateViewer1.RectC.Width + ";" + templateViewer1.RectC.Height + "|"; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectD; rec += templateViewer1.RectD.X + ";" + templateViewer1.RectD.Y + ";" + templateViewer1.RectD.Width + ";" + templateViewer1.RectD.Height + "|"; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectE; rec += templateViewer1.RectE.X + ";" + templateViewer1.RectE.Y + ";" + templateViewer1.RectE.Width + ";" + templateViewer1.RectE.Height; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); MessageBox.Show(text); MessageBox.Show(rec); }
public void StartOCR(Bitmap theBitmap) { string language = "eng"; string languageData = Application.StartupPath + @"\tessdata\"; using (TesseractProcessor processor = new TesseractProcessor()) { if (processor.Init(languageData, language, (int)eOcrEngineMode.OEM_DEFAULT)) { string text = processor.Recognize(theBitmap); richTextBox1.AppendText(text + "\n"); } } }
private void OCR_Load(object sender, EventArgs e) { m_tesseract = new TesseractProcessor(); bool succeed = m_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Tesseract initialization failed. The application will exit."); Application.Exit(); } m_tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFHKLMNPQRSTUVXYZ-.1234567890").ToString(); //Training Sets //abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVXYZ-.1234567890 System.Environment.CurrentDirectory = System.IO.Path.GetFullPath(m_path); }
private void MainForm_Load(object sender, EventArgs e) { int soluong; int tong; tong = int.Parse(txt_tong.Text); soluong = int.Parse(bss.so_luong_xe()); txt_conlai.Text = (tong - soluong).ToString(); dataGridView1.DataSource = bss.Danhsachxe(); capture = new Emgu.CV.Capture(); timer1.Enabled = true; full_tesseract = new TesseractProcessor(); bool succeed = full_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Tesseract initialization failed. The application will exit."); Application.Exit(); } full_tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFHKLMNPRSTVXY1234567890").ToString(); ch_tesseract = new TesseractProcessor(); succeed = ch_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Tesseract initialization failed. The application will exit."); Application.Exit(); } ch_tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFHKLMNPRSTUVXY").ToString(); num_tesseract = new TesseractProcessor(); succeed = num_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Tesseract initialization failed. The application will exit."); Application.Exit(); } num_tesseract.SetVariable("tessedit_char_whitelist", "1234567890").ToString(); m_path = System.Environment.CurrentDirectory + "\\"; string[] ports = SerialPort.GetPortNames(); for (int i = 0; i < box.Length; i++) { box[i] = new PictureBox(); } }
void CheckInPage_Loaded(object sender, RoutedEventArgs e) { full_tesseract = new TesseractProcessor(); bool succeed = full_tesseract.Init(m_path, m_lang, 3); if (!succeed) { System.Windows.MessageBox.Show("Lỗi thư viện Tesseract. Chương trình cần kết thúc."); } full_tesseract.SetVariable("tessedit_char_whitelist", "ACDFHKLMNPRSTVXY1234567890").ToString(); ch_tesseract = new TesseractProcessor(); succeed = ch_tesseract.Init(m_path, m_lang, 3); if (!succeed) { System.Windows.MessageBox.Show("Lỗi thư viện Tesseract. Chương trình cần kết thúc."); } ch_tesseract.SetVariable("tessedit_char_whitelist", "ACDEFHKLMNPRSTUVXY").ToString(); num_tesseract = new TesseractProcessor(); succeed = num_tesseract.Init(m_path, m_lang, 3); if (!succeed) { System.Windows.MessageBox.Show("Lỗi thư viện Tesseract. Chương trình cần kết thúc."); } num_tesseract.SetVariable("tessedit_char_whitelist", "1234567890").ToString(); System.Environment.CurrentDirectory = System.IO.Path.GetFullPath(m_path); for (int i = 0; i < box.Length; i++) { box[i] = new PictureBox(); } string folder = System.AppDomain.CurrentDomain.BaseDirectory + "\\ImageTest"; foreach (string fileName in Directory.GetFiles(folder, "*.bmp", SearchOption.TopDirectoryOnly)) { lstimages.Add(System.IO.Path.GetFullPath(fileName)); } foreach (string fileName in Directory.GetFiles(folder, "*.jpg", SearchOption.TopDirectoryOnly)) { lstimages.Add(System.IO.Path.GetFullPath(fileName)); } pic1 = (hostPic1.Child as System.Windows.Forms.PictureBox); pic2 = (hostPic2.Child as System.Windows.Forms.PictureBox); img1 = (hostImg1.Child as Emgu.CV.UI.ImageBox); }
//private string _Caption = "http://www.laptrinhvb.net"; #endregion private void FrmMain_Load(object sender, EventArgs e) { full_tesseract = new TesseractProcessor(); bool succeed = full_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Lỗi thư viện Tesseract. Chương trình cần kết thúc."); Application.Exit(); } full_tesseract.SetVariable("tessedit_char_whitelist", "ACDFHKLMNPRSTVXY1234567890").ToString(); ch_tesseract = new TesseractProcessor(); succeed = ch_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Lỗi thư viện Tesseract. Chương trình cần kết thúc."); Application.Exit(); } ch_tesseract.SetVariable("tessedit_char_whitelist", "ACDEFHKLMNPRSTUVXY").ToString(); num_tesseract = new TesseractProcessor(); succeed = num_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Lỗi thư viện Tesseract. Chương trình cần kết thúc."); Application.Exit(); } num_tesseract.SetVariable("tessedit_char_whitelist", "1234567890").ToString(); System.Environment.CurrentDirectory = System.IO.Path.GetFullPath(m_path); for (int i = 0; i < box.Length; i++) { box[i] = new PictureBox(); } string folder = Application.StartupPath + "\\ImageTest"; foreach (string fileName in Directory.GetFiles(folder, "*.bmp", SearchOption.TopDirectoryOnly)) { lstimages.Add(Path.GetFullPath(fileName)); } foreach (string fileName in Directory.GetFiles(folder, "*.jpg", SearchOption.TopDirectoryOnly)) { lstimages.Add(Path.GetFullPath(fileName)); } }
/* private void button3_Click(object sender, EventArgs e) * { * if (capture != null) * { * timer1.Enabled = false; * pictureBox_XeRA.Image = null; * IF.pictureBox2.Image = null; * capture.QueryFrame().Save("aa.bmp"); * FileStream fs = new FileStream(m_path + "aa.bmp", FileMode.Open, FileAccess.Read); * Image temp = Image.FromStream(fs); * fs.Close(); * pictureBox_XeRA.Image = temp; * IF.pictureBox2.Image = temp; * pictureBox_XeRA.Update(); * IF.pictureBox2.Update(); * Image temp1; * string temp2, temp3; * /*Reconize(m_path + "aa.bmp", out temp1, out temp2, out temp3); * pictureBox_XeVAO.Image = temp1; * if(temp3 == "") * text_BiensoVAO.Text = "ko nhận dạng dc biển số"; * else * text_BiensoVAO.Text = temp3; * * timer1.Enabled = true; * } * * }*/ private void MainForm_Load_1(object sender, EventArgs e) { capture = new Emgu.CV.Capture(); timer2.Enabled = true; timer4.Start(); IF = new ImageForm(); full_tesseract = new TesseractProcessor(); bool succeed = full_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Tesseract initialization failed. The application will exit."); Application.Exit(); } full_tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFHKLMNPRSTVXY1234567890").ToString(); ch_tesseract = new TesseractProcessor(); succeed = ch_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Tesseract initialization failed. The application will exit."); Application.Exit(); } ch_tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFHKLMNPRSTUVXY").ToString(); num_tesseract = new TesseractProcessor(); succeed = num_tesseract.Init(m_path, m_lang, 3); if (!succeed) { MessageBox.Show("Tesseract initialization failed. The application will exit."); Application.Exit(); } num_tesseract.SetVariable("tessedit_char_whitelist", "1234567890").ToString(); m_path = System.Environment.CurrentDirectory + "\\"; string[] ports = SerialPort.GetPortNames(); for (int i = 0; i < box.Length; i++) { box[i] = new PictureBox(); } }
public static void Recognize() { int n_images = Workspace.Images.Length; int i_image = n_images - 1; i_image = 0; string fileName = Workspace.Images[i_image]; string imageFile = Path.Combine(Workspace.InputFolder, fileName); string language = "eng"; int oem = (int)eOcrEngineMode.OEM_DEFAULT; using (TesseractProcessor processor = new TesseractProcessor()) { using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { DateTime started = DateTime.Now; DateTime ended = DateTime.Now; for (int i = 0; i < 4; i++) //for (int i = 3; i < 4; i++) { oem = i; processor.Init(Workspace.TessdataFolder, language, oem); string text = ""; unsafe { started = DateTime.Now; text = processor.Recognize(bmp); ended = DateTime.Now; Console.WriteLine("Duration recognition: {0} ms\n\n", (ended - started).TotalMilliseconds); } Console.WriteLine( string.Format("RecognizeMode: {1}\nRecognized Text:\n{0}\n++++++++++++++++++++++++++++++++\n", text, ((eOcrEngineMode)oem).ToString())); } } } }
private void DoCommandShowOptions() { DlgOptions dlg = new DlgOptions(); dlg.DataPath = _tessData; if (dlg.ShowDialog(this) == DialogResult.OK) { _tessData = dlg.DataPath; _lang = dlg.Language; _ocrEngineMode = (int)dlg.OcrEngineMode; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); Console.WriteLine(string.Format("[DEBUG] Init status: {0}", status)); string msg = string.Format("{0} to initialize Tesseract Engine {1}.", (status ? "Succeed" : "Failed"), _ocrProcessor.GetTesseractEngineVersion()); MessageBox.Show(msg); } }
protected void Page_Load(object sender, EventArgs e) { var image = System.Drawing.Image.FromFile(@"D:\Image\Capture1T.tif"); m_tesseract = new TesseractProcessor(); bool succeed = m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT); if (!succeed) { } m_tesseract.SetVariable("tessedit_pageseg_mode", ((int)TesseractPageSegMode.PSM_SINGLE_LINE).ToString()); m_tesseract.Clear(); m_tesseract.ClearAdaptiveClassifier(); string outValue = m_tesseract.Apply(image); Response.Write(outValue); }
static void Simple1_Recognize() { using (TesseractProcessor processor = new TesseractProcessor()) { using (Bitmap bmp = Bitmap.FromFile("phototest.tif") as Bitmap) { DateTime started = DateTime.Now; DateTime ended = DateTime.Now; int oem = 0; for (int i = 0; i < 4; i++) //for (int i = 3; i < 4; i++) { oem = i; bool ok = processor.Init(TessdataFolder, "eng", i); if (ok) { string text = ""; unsafe { started = DateTime.Now; text = processor.Recognize(bmp); ended = DateTime.Now; Console.WriteLine("Duration recognition: {0} ms\n\n", (ended - started).TotalMilliseconds); } Console.WriteLine( string.Format("RecognizeMode: {1}\nRecognized Text:\n{0}\n++++++++++++++++++++++++++++++++\n", text, ((eOcrEngineMode)oem).ToString())); } else { Console.WriteLine("FAIL " + i.ToString()); } } } } }
private void button1_Click(object sender, RoutedEventArgs e) { const string language = "eng"; const string TessractData = @"C:\Users\Salman\Documents\GitHub\project-pinnacle\Research\Salman\OCRTest\tessdata\"; const string Path = @"C:\Users\Salman\NUS\EE4001\Research\trial.tif"; TesseractProcessor processor = new TesseractProcessor(); processor.DoMonitor = true; System.Drawing.Image bmp = System.Drawing.Image.FromFile(Path); bool x = processor.Init(TessractData, language, (int)eOcrEngineMode.TESSERACT_CUBE_COMBINED); processor.Clear(); processor.ClearAdaptiveClassifier(); string result = processor.Apply(bmp); List<Word> detectedWords = processor.RetriveResultDetail(); int a = 2; //this.UpdateImageViewer(detectedWords); //using (var bmp = Bitmap.FromFile(Path) as Bitmap) //{ // var success = processor.Init(TessractData, language, (int)eOcrEngineMode.DEFAULT); // if (!success) // { // Console.WriteLine("Failed to initialize tesseract."); // } // else // { // string text = processor.Recognize(bmp); // Console.WriteLine("Text:"); // Console.WriteLine("*****************************"); // Console.WriteLine(text); // Console.WriteLine("*****************************"); // } //} //Console.WriteLine("Press any key to exit."); //Console.ReadKey(); }
public override string RecognizeText(IList <Image> images, string lang) { string tessdata = Path.Combine(basedir, TESSDATA); TesseractProcessor processor = new TesseractProcessor(); processor.Init(tessdata, lang, oem); StringBuilder strB = new StringBuilder(); foreach (Image image in images) { string text = processor.Recognize(image, rect); if (text == null) { return(String.Empty); } strB.Append(text); } return(strB.ToString().Replace("\n", Environment.NewLine)); }
private static void Recognize() { string imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\phototest.tif"; imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\eurotext.tif"; string tessdata = @"D:\Self-Study\OpenSources\Tesseract\original\tessdata\"; string language = "eng"; int oem = 3; TesseractProcessor processor = new TesseractProcessor(); processor.Init(tessdata, language, oem); using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { string text = processor.Recognize(bmp); Console.WriteLine( string.Format("Text:\n{0}\n", text)); } }
static void Simple_Recognize() { string imageFile = Images[0]; TesseractProcessor processor = new TesseractProcessor(); using (var bmp = Bitmap.FromFile(imageFile) as Bitmap) { var success = processor.Init(TessdataFolder, "eng", (int)eOcrEngineMode.OEM_DEFAULT); if (!success) { Console.WriteLine("Failed to initialize tesseract."); } else { string text = processor.Recognize(bmp); Console.WriteLine("Text:"); Console.WriteLine("*****************************"); Console.WriteLine(text); Console.WriteLine("*****************************"); } } }
/// <summary> /// Search a string into an image /// </summary> /// <param name="image">the image</param> /// <param name="inputString">the string to search</param> /// <returns>return true if the string was found</returns> private static bool checkStringByOCR(Bitmap image, string inputString) { Stopwatch a = new Stopwatch(); a.Start(); using (var bmp = image) { try { //init the OCR engine, I use tessaract as OCR engine. TesseractProcessor processor = new TesseractProcessor(); //verify if tessaract has been successfully loaded var success = processor.Init(_ocrLangData, _ocrLanguageSelected, (int)eOcrEngineMode.OEM_DEFAULT); if (!success) { //if tessaract has not been successfully loaded then write the error LogUtils.Write(new StackFrame(0, true), LogUtils.ErrorLevel.Error, "Failed to start OCR engine"); Program.Finish(true); return false; } else { //extract the string from the image string textInImage = processor.Recognize(bmp); //textInImage = textInImage.Replace('\n', ' '); //textInImage = textInImage.Replace('\r', ' '); //write a debug message if (_debugLogLevel) LogUtils.Write(new StackFrame(0, true), LogUtils.ErrorLevel.Debug, "text found: " + textInImage); //check if the string extracted from the image is equal to the string that we want to find. //NOTE that tesseract occasionally swaps a letter with another. For example, it may change //an "m" with two letters "rm" or an "l" with a "|" and so on. So i try to solve some of these errors. //You can set a debug level on the log option and search into the log file the string "text found: " and //then see text that tesseract has found if (Regex.IsMatch(textInImage, inputString) || Regex.IsMatch(textInImage.Replace("m", "rn"), inputString) || Regex.IsMatch(textInImage.Replace("rn", "m"), inputString) || Regex.IsMatch(textInImage.Replace("l", "1"), inputString) || Regex.IsMatch(textInImage.Replace("1", "l"), inputString) || Regex.IsMatch(textInImage.Replace("l", "i"), inputString) || Regex.IsMatch(textInImage.Replace("i", "l"), inputString) || Regex.IsMatch(textInImage.Replace("t", "l"), inputString) || Regex.IsMatch(textInImage.Replace("l", "t"), inputString) || Regex.IsMatch(textInImage.Replace("1", "i"), inputString) || Regex.IsMatch(textInImage.Replace("i", "1"), inputString) || Regex.IsMatch(textInImage.Replace("I", "l"), inputString) || Regex.IsMatch(textInImage.Replace("l", "I"), inputString) || Regex.IsMatch(textInImage.Replace("I", "1"), inputString) || Regex.IsMatch(textInImage.Replace("1", "I"), inputString) || Regex.IsMatch(textInImage.Replace("I", "t"), inputString) || Regex.IsMatch(textInImage.Replace("t", "I"), inputString) || Regex.IsMatch(textInImage.Replace("0", "o"), inputString) || Regex.IsMatch(textInImage.Replace("o", "0"), inputString) || Regex.IsMatch(textInImage.Replace("0", "O"), inputString) || Regex.IsMatch(textInImage.Replace("O", "0"), inputString) || Regex.IsMatch(textInImage.Replace("l", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "l"), inputString) || Regex.IsMatch(textInImage.Replace("I", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "I"), inputString) || Regex.IsMatch(textInImage.Replace("t", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "t"), inputString) || Regex.IsMatch(textInImage.Replace("i", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "i"), inputString) || Regex.IsMatch(textInImage.Replace("M", "II"), inputString) || Regex.IsMatch(textInImage.Replace("II", "M"), inputString) || Regex.IsMatch(textInImage.Replace("ni", "m"), inputString) || Regex.IsMatch(textInImage.Replace("m", "ni"), inputString)) { a.Stop(); return true; } else return false; } } catch (Exception ex) { //write the error LogUtils.Write(ex); Program.Finish(true); return false; } } }
public parser(Image img, Form1 call) { baseIMG = img; caller = call; ocr = new TesseractProcessor(); ocr.DoMonitor = true; //MessageBox.Show(Application.StartupPath+@"\"); ocr.Init(Application.StartupPath + @"\tessdata", "deu", 0); }
public static string Recognize(string url,Rectangle rectangle) { WebRequest request = WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream st = response.GetResponseStream(); if (st == null) { return string.Empty; } Bitmap bitmap = ((Bitmap)Bitmap.FromStream(st)).toGray().biLinear(2); //var newBitmap= KiCut(bitmap, 12, 22, bitmap.Width - 12, 22); string tessdata = Environment.CurrentDirectory + "\\tessdata\\"; string language = "eng";//设置训练文件的名称,后缀traineddata之前的名称 int oem = 3; // http://www.lixin.me/blog/2012/05/26/29536 训练 using (TesseractProcessor processor = new TesseractProcessor()) { //初始化 bool initFlage = processor.Init(tessdata, language, oem); //processor.GetTesseractEngineVersion();//获取版本号 /* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ //processor.SetPageSegMode(ePageSegMode.PSM_SINGLE_BLOCK); //设置ROI(图像的感兴趣区域) if (rectangle != null) { processor.UseROI = true; processor.ROI = rectangle; } //设置识别的变量 如果是自定义培训的文件 可以不用设置 //必须在初始化后调用 processor.SetVariable("tessedit_char_whitelist", "0123456789."); //processor.SetVariable("tessedit_thresholding_method", "1"); //图像处理阀值是否打开 //processor.SetVariable("save_best_choices", "T"); using (Bitmap bmp = bitmap) { string text = processor.Recognize(bmp); char[] charsToTrim = { '\\', 'n', '\\', 'n' }; return text.TrimEnd(charsToTrim); } } }
private void button3_Click(object sender, EventArgs e) { OpenFileDialog opn = new OpenFileDialog(); opn.ShowDialog(); StreamReader sr = new StreamReader(opn.FileName); String line = sr.ReadToEnd(); char[] delimiterChars = { '|' }; char[] delimiterChars1 = { ';' }; string[] str = line.Split(delimiterChars); Image mg = Image.FromFile(Application.StartupPath + "\\1.jpg"); TesseractProcessor _ocrProcessor = null; _ocrProcessor = new TesseractProcessor(); string _tessData = Application.StartupPath + "\\tessdata\\"; string _lang = "eng"; int _ocrEngineMode = 3; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); string[] str1 = str[0].Split(delimiterChars1); Rectangle recA = new Rectangle(Convert.ToInt32(str1[0]), Convert.ToInt32(str1[1]), Convert.ToInt32(str1[2]), Convert.ToInt32(str1[3])); _ocrProcessor.ROI = recA; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recA); templateViewer1.RectA = recA; string text = _ocrProcessor.Recognize(mg); string[] str2 = str[1].Split(delimiterChars1); Rectangle recB = new Rectangle(Convert.ToInt32(str2[0]), Convert.ToInt32(str2[1]), Convert.ToInt32(str2[2]), Convert.ToInt32(str2[3])); _ocrProcessor.ROI = recB; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recB); templateViewer1.RectB = recB; text += _ocrProcessor.Recognize(mg); string[] str3 = str[2].Split(delimiterChars1); Rectangle recC = new Rectangle(Convert.ToInt32(str3[0]), Convert.ToInt32(str3[1]), Convert.ToInt32(str3[2]), Convert.ToInt32(str3[3])); _ocrProcessor.ROI = recC; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recC); templateViewer1.RectC = recC; text += _ocrProcessor.Recognize(mg); string[] str4 = str[3].Split(delimiterChars1); Rectangle recD = new Rectangle(Convert.ToInt32(str4[0]), Convert.ToInt32(str4[1]), Convert.ToInt32(str4[2]), Convert.ToInt32(str4[3])); _ocrProcessor.ROI = recD; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recD); templateViewer1.RectD = recD; text += _ocrProcessor.Recognize(mg); string[] str5 = str[4].Split(delimiterChars1); Rectangle recE = new Rectangle(Convert.ToInt32(str5[0]), Convert.ToInt32(str5[0]), Convert.ToInt32(str5[0]), Convert.ToInt32(str5[0])); _ocrProcessor.ROI = recE; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recE); templateViewer1.RectE = recE; text += _ocrProcessor.Recognize(mg); MessageBox.Show(text); }
public string UseTesseract(string imgFile) { this.ErrMsg = string.Empty; string defLang = "eng"; string strResult = string.Empty; try { using (TesseractProcessor processor = new TesseractProcessor()) { processor.Init(this.TESSDATA, defLang, this.oem); processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), this.pageSegMode)); #if DEBUG System.Diagnostics.Debug.WriteLine("processor:"); System.Diagnostics.Debug.WriteLine(processor.GetTesseractEngineVersion()); #endif string strIndicate = Path.GetFileNameWithoutExtension(imgFile); strResult = processor.Recognize(imgFile); if (!string.IsNullOrEmpty(strResult)) { // correct common errors caused by OCR strResult = this.CorrectOCRErrors(strResult); // correct letter cases strResult = this.CorrectLetterCases(strResult); } } strResult = strResult.Replace("\n", Environment.NewLine); } catch (Exception ex) { this.ErrMsg = ex.ToString(); strResult = string.Empty; } return strResult; }
private void button2_Click(object sender, EventArgs e) { Image mg = Image.FromFile(Application.StartupPath + "\\1.jpg"); ////Image mg = Image.FromFile("e:\\test24.tif"); imageViewer1.Initialize(new OCRRenderingData(), new OCRAnalysisRender(imageViewer1)); imageViewer1.Image = mg; TesseractProcessor _ocrProcessor = null; _ocrProcessor = new TesseractProcessor(); _ocrProcessor.UseROI = false; imageViewer1.RectA = new Rectangle(0, 0, 0, 0); //string _tessData = "E:\\tesseract\\tesseract-ocr-dotnet-3.01\\tessdata\\"; string _tessData = Application.StartupPath + "\\tessdata\\"; string _lang = "eng"; int _ocrEngineMode = 3; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); Console.WriteLine(string.Format("[DEBUG] Init status: {0}", status)); //string text = _ocrProcessor.Recognize(mg); //MessageBox.Show(text); ////**************************************************** converts 1bpp to 24bpp //Bitmap image = new Bitmap(@"e:\\test11.tif"); //Bitmap bitmap = new Bitmap(image.Width, image.Height, System.Drawing.Imaging.PixelFormat.Format24bppRgb); //for (int i = 0; i < image.Width; i++) //{ // for (int j = 0; j < image.Height; j++) // { // Color temp = image.GetPixel(i, j); // bitmap.SetPixel(i, j, temp); // } //} //bitmap.Save(@"e:\\test24.tif", System.Drawing.Imaging.ImageFormat.Bmp); //Image mg1 = Image.FromFile("e:\\test24.tif"); ////****************************************************************************************************** string variable = "tessedit_pageseg_mode"; // Fully automatic page segmentation int fully_psm_auto = 3; _ocrProcessor.SetVariable(variable, fully_psm_auto.ToString()); ///// DEMO ONLY //_ocrProcessor.Clear(); //_ocrProcessor.ClearAdaptiveClassifier(); //BlockList blocks = _ocrProcessor.DetectBlocks(mg); //this.UpdateImageViewer(blocks); _ocrProcessor.Clear(); _ocrProcessor.ClearAdaptiveClassifier(); string result = _ocrProcessor.Apply(mg); List <Word> detectedWords = _ocrProcessor.RetriveResultDetail(); this.UpdateImageViewer(detectedWords); //MessageBox.Show(result); button3.Enabled = true; button4.Enabled = true; }
/* private string GetNumber(Bitmap image) { UnCodebase ud = new UnCodebase(image); ud.DealMap2(); image = ud.bmpobj; pictureBox1.Image = image; if (!bInitT) { ocrNumber = new TesseractProcessor(); bool inited = ocrNumber.Init(@".\", "chi_sim", 3); if (!inited) { MessageBox.Show("智能识别引擎初始化失败!"); return ""; } ocrNumber.SetVariable("tessedit_char_whitelist", ".,3214567890 "); } ocrNumber.Clear(); ocrNumber.ClearAdaptiveClassifier(); string result = ocrNumber.Recognize(image); return result; } * */ private string GetChiSim(Bitmap image) { UnCodebase ud = new UnCodebase(image); ud.DealText(); image = ud.image; //pictureBox1.Image = image; textBox3.Text = ud.txt; if (!bInitT) { ocrText = new TesseractProcessor(); bInitT = ocrText.Init(@".\", "chi_sim", 3); if (!bInitT) { MessageBox.Show("智能识别引擎初始化失败!"); return ""; } ocrText.SetVariable("tessedit_char_blacklist", "1234567890μqwertyuiopasdfghjklzxcvbnm'I flU]M.H flO"); } ocrText.Clear(); ocrText.ClearAdaptiveClassifier(); string result = ocrText.Recognize(image); return result; }