private void button1_Click(object sender, EventArgs e) { Image mg = Image.FromFile(Application.StartupPath + "\\1.jpg"); ////Image mg = Image.FromFile("e:\\test24.tif"); templateViewer1.Initialize(new OCRRenderingData(), new OCRAnalysisRender(templateViewer1)); templateViewer1.Image = mg; TesseractProcessor _ocrProcessor = null; _ocrProcessor = new TesseractProcessor(); string _tessData = Application.StartupPath + "\\tessdata\\"; string _lang = "eng"; int _ocrEngineMode = 3; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); _ocrProcessor.ROI = templateViewer1.RectA; rec += templateViewer1.RectA.X + ";" + templateViewer1.RectA.Y + ";" + templateViewer1.RectA.Width + ";" + templateViewer1.RectA.Height + "|"; _ocrProcessor.UseROI = true; string text = _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectB; rec += templateViewer1.RectB.X + ";" + templateViewer1.RectB.Y + ";" + templateViewer1.RectB.Width + ";" + templateViewer1.RectB.Height + "|"; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectC; rec += templateViewer1.RectC.X + ";" + templateViewer1.RectC.Y + ";" + templateViewer1.RectC.Width + ";" + templateViewer1.RectC.Height + "|"; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectD; rec += templateViewer1.RectD.X + ";" + templateViewer1.RectD.Y + ";" + templateViewer1.RectD.Width + ";" + templateViewer1.RectD.Height + "|"; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); _ocrProcessor.ROI = templateViewer1.RectE; rec += templateViewer1.RectE.X + ";" + templateViewer1.RectE.Y + ";" + templateViewer1.RectE.Width + ";" + templateViewer1.RectE.Height; _ocrProcessor.UseROI = true; text += _ocrProcessor.Recognize(mg); MessageBox.Show(text); MessageBox.Show(rec); }
public static void Main(string[] args) { const string language = "eng"; string imageFile = args[0]; TesseractProcessor processor = new TesseractProcessor(); using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { var success = processor.Init(TessractData, language, (int)eOcrEngineMode.OEM_DEFAULT); if (!success) { Console.WriteLine("Failed to initialize tesseract."); } else { // use thresholder processor.UseThresholder(); string text = processor.Recognize(bmp); Console.WriteLine("Text:"); Console.WriteLine("*****************************"); Console.WriteLine(text); Console.WriteLine("*****************************"); } } Console.WriteLine("Press any key to exit."); Console.ReadKey(); }
public override string RecognizeText(IList <Image> images, string lang) { string tessdata = Path.Combine(basedir, TESSDATA); TesseractProcessor processor = new TesseractProcessor(); processor.Init(tessdata, lang, oem); processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), PSM)); StringBuilder strB = new StringBuilder(); foreach (Image image in images) { //if (rect != Rectangle.Empty) //{ // processor.UseROI = true; // processor.ROI = rect; //} string text = processor.Recognize(image); if (text == null) { return(String.Empty); } strB.Append(text); } return(strB.ToString().Replace("\n", Environment.NewLine)); }
private void button1_Click(object sender, EventArgs e) { Image mg = Image.FromFile(Application.StartupPath + "\\1.jpg"); ////Image mg = Image.FromFile("e:\\test24.tif"); imageViewer1.Initialize(new OCRRenderingData(), new OCRAnalysisRender(imageViewer1)); imageViewer1.Image = mg; TesseractProcessor _ocrProcessor = null; _ocrProcessor = new TesseractProcessor(); //string _tessData = "E:\\tesseract\\tesseract-ocr-dotnet-3.01\\tessdata\\"; string _tessData = Application.StartupPath + "\\tessdata\\"; string _lang = "eng"; int _ocrEngineMode = 3; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); _ocrProcessor.ROI = imageViewer1.RectA; _ocrProcessor.UseROI = true; string text = _ocrProcessor.Recognize(mg); //string text1 = text.Replace(" ", ""); MessageBox.Show(text); }
public static string Recognize(string url) { WebRequest request = WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream st = response.GetResponseStream(); if (st == null) { return(string.Empty); } try { Bitmap bitmap = ((Bitmap)Bitmap.FromStream(st)).toGray().biLinear(2); //var newBitmap= KiCut(bitmap, 12, 22, bitmap.Width - 12, 22); string tessdata = Environment.CurrentDirectory + "\\tessdata\\"; string language = "eng";//设置训练文件的名称,后缀traineddata之前的名称 int oem = 3; // http://www.lixin.me/blog/2012/05/26/29536 训练 using (TesseractProcessor processor = new TesseractProcessor()) { //初始化 bool initFlage = processor.Init(tessdata, language, oem); //processor.GetTesseractEngineVersion();//获取版本号 /* * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. * The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ //processor.SetPageSegMode(ePageSegMode.PSM_SINGLE_BLOCK); //设置ROI(图像的感兴趣区域) processor.UseROI = true; processor.ROI = new Rectangle(24, 0, bitmap.Width - 24, bitmap.Height); //设置识别的变量 如果是自定义培训的文件 可以不用设置 //必须在初始化后调用 processor.SetVariable("tessedit_char_whitelist", "0123456789."); //processor.SetVariable("tessedit_thresholding_method", "1"); //图像处理阀值是否打开 //processor.SetVariable("save_best_choices", "T"); using (Bitmap bmp = bitmap) { int i = 3; oem = i; string text = processor.Recognize(bmp); char[] charsToTrim = { '\\', 'n', '\\', 'n' }; return(text.TrimEnd(charsToTrim)); //Console.WriteLine( // string.Format( // "RecognizeMode: {1}\nText:\n{0}\n++++++\n", text, ((eOcrEngineMode)oem).ToString())); } } } catch (Exception) { return(string.Empty); } }
private void button1_Click(object sender, EventArgs e) { string d = textBox_Path.Text; if (!Directory.Exists(d)) { MessageBox.Show("目录" + d + "不存在"); return; } if (!d.EndsWith("\\")) { d += "\\"; } string d1 = d + "new\\"; if (!Directory.Exists(d1)) { Directory.CreateDirectory(d1); } Regex reg = new Regex(@"\d+\.\d\d", RegexOptions.Compiled); foreach (string f in Directory.GetFiles(textBox_Path.Text)) { if (!f.ToLower().EndsWith(".png") && !f.ToLower().EndsWith(".jpg") && !f.ToLower().EndsWith(".bmp")) { continue; } Bitmap bmp = new Bitmap(f); TesseractProcessor process = new TesseractProcessor(); process.SetPageSegMode(ePageSegMode.PSM_SINGLE_LINE); process.Init(System.Environment.CurrentDirectory + "\\", "chi_sim", (int)eOcrEngineMode.OEM_DEFAULT); try { string result = process.Recognize(bmp); Match m = reg.Match(result); if (m.Success) { string amount = m.ToString(); File.Copy(f, d1 + amount + Path.GetExtension(f), true); } else { File.Copy(f, d1 + Path.GetFileName(f), true); } } catch (Exception) { MessageBox.Show("识别文件出错:" + f); File.Copy(f, d1 + Path.GetFileName(f), true); } bmp.Dispose(); } System.Diagnostics.Process.Start(d1); }
public void StartOCR(Bitmap theBitmap) { string language = "eng"; string languageData = Application.StartupPath + @"\tessdata\"; using (TesseractProcessor processor = new TesseractProcessor()) { if (processor.Init(languageData, language, (int)eOcrEngineMode.OEM_DEFAULT)) { string text = processor.Recognize(theBitmap); richTextBox1.AppendText(text + "\n"); } } }
public static void Recognize() { int n_images = Workspace.Images.Length; int i_image = n_images - 1; i_image = 0; string fileName = Workspace.Images[i_image]; string imageFile = Path.Combine(Workspace.InputFolder, fileName); string language = "eng"; int oem = (int)eOcrEngineMode.OEM_DEFAULT; using (TesseractProcessor processor = new TesseractProcessor()) { using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { DateTime started = DateTime.Now; DateTime ended = DateTime.Now; for (int i = 0; i < 4; i++) //for (int i = 3; i < 4; i++) { oem = i; processor.Init(Workspace.TessdataFolder, language, oem); string text = ""; unsafe { started = DateTime.Now; text = processor.Recognize(bmp); ended = DateTime.Now; Console.WriteLine("Duration recognition: {0} ms\n\n", (ended - started).TotalMilliseconds); } Console.WriteLine( string.Format("RecognizeMode: {1}\nRecognized Text:\n{0}\n++++++++++++++++++++++++++++++++\n", text, ((eOcrEngineMode)oem).ToString())); } } } }
static void Simple1_Recognize() { using (TesseractProcessor processor = new TesseractProcessor()) { using (Bitmap bmp = Bitmap.FromFile("phototest.tif") as Bitmap) { DateTime started = DateTime.Now; DateTime ended = DateTime.Now; int oem = 0; for (int i = 0; i < 4; i++) //for (int i = 3; i < 4; i++) { oem = i; bool ok = processor.Init(TessdataFolder, "eng", i); if (ok) { string text = ""; unsafe { started = DateTime.Now; text = processor.Recognize(bmp); ended = DateTime.Now; Console.WriteLine("Duration recognition: {0} ms\n\n", (ended - started).TotalMilliseconds); } Console.WriteLine( string.Format("RecognizeMode: {1}\nRecognized Text:\n{0}\n++++++++++++++++++++++++++++++++\n", text, ((eOcrEngineMode)oem).ToString())); } else { Console.WriteLine("FAIL " + i.ToString()); } } } } }
private static void Recognize() { string imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\phototest.tif"; imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\eurotext.tif"; string tessdata = @"D:\Self-Study\OpenSources\Tesseract\original\tessdata\"; string language = "eng"; int oem = 3; TesseractProcessor processor = new TesseractProcessor(); processor.Init(tessdata, language, oem); using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap) { string text = processor.Recognize(bmp); Console.WriteLine( string.Format("Text:\n{0}\n", text)); } }
public override string RecognizeText(IList <Image> images, string lang) { string tessdata = Path.Combine(basedir, TESSDATA); TesseractProcessor processor = new TesseractProcessor(); processor.Init(tessdata, lang, oem); StringBuilder strB = new StringBuilder(); foreach (Image image in images) { string text = processor.Recognize(image, rect); if (text == null) { return(String.Empty); } strB.Append(text); } return(strB.ToString().Replace("\n", Environment.NewLine)); }
static void Simple_Recognize() { string imageFile = Images[0]; TesseractProcessor processor = new TesseractProcessor(); using (var bmp = Bitmap.FromFile(imageFile) as Bitmap) { var success = processor.Init(TessdataFolder, "eng", (int)eOcrEngineMode.OEM_DEFAULT); if (!success) { Console.WriteLine("Failed to initialize tesseract."); } else { string text = processor.Recognize(bmp); Console.WriteLine("Text:"); Console.WriteLine("*****************************"); Console.WriteLine(text); Console.WriteLine("*****************************"); } } }
public string UseTesseract(string imgFile) { this.ErrMsg = string.Empty; string defLang = "eng"; string strResult = string.Empty; try { using (TesseractProcessor processor = new TesseractProcessor()) { processor.Init(this.TESSDATA, defLang, this.oem); processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), this.pageSegMode)); #if DEBUG System.Diagnostics.Debug.WriteLine("processor:"); System.Diagnostics.Debug.WriteLine(processor.GetTesseractEngineVersion()); #endif string strIndicate = Path.GetFileNameWithoutExtension(imgFile); strResult = processor.Recognize(imgFile); if (!string.IsNullOrEmpty(strResult)) { // correct common errors caused by OCR strResult = this.CorrectOCRErrors(strResult); // correct letter cases strResult = this.CorrectLetterCases(strResult); } } strResult = strResult.Replace("\n", Environment.NewLine); } catch (Exception ex) { this.ErrMsg = ex.ToString(); strResult = string.Empty; } return strResult; }
private void button3_Click(object sender, EventArgs e) { OpenFileDialog opn = new OpenFileDialog(); opn.ShowDialog(); StreamReader sr = new StreamReader(opn.FileName); String line = sr.ReadToEnd(); char[] delimiterChars = { '|' }; char[] delimiterChars1 = { ';' }; string[] str = line.Split(delimiterChars); Image mg = Image.FromFile(Application.StartupPath + "\\1.jpg"); TesseractProcessor _ocrProcessor = null; _ocrProcessor = new TesseractProcessor(); string _tessData = Application.StartupPath + "\\tessdata\\"; string _lang = "eng"; int _ocrEngineMode = 3; bool status = _ocrProcessor.Init(_tessData, _lang, _ocrEngineMode); string[] str1 = str[0].Split(delimiterChars1); Rectangle recA = new Rectangle(Convert.ToInt32(str1[0]), Convert.ToInt32(str1[1]), Convert.ToInt32(str1[2]), Convert.ToInt32(str1[3])); _ocrProcessor.ROI = recA; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recA); templateViewer1.RectA = recA; string text = _ocrProcessor.Recognize(mg); string[] str2 = str[1].Split(delimiterChars1); Rectangle recB = new Rectangle(Convert.ToInt32(str2[0]), Convert.ToInt32(str2[1]), Convert.ToInt32(str2[2]), Convert.ToInt32(str2[3])); _ocrProcessor.ROI = recB; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recB); templateViewer1.RectB = recB; text += _ocrProcessor.Recognize(mg); string[] str3 = str[2].Split(delimiterChars1); Rectangle recC = new Rectangle(Convert.ToInt32(str3[0]), Convert.ToInt32(str3[1]), Convert.ToInt32(str3[2]), Convert.ToInt32(str3[3])); _ocrProcessor.ROI = recC; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recC); templateViewer1.RectC = recC; text += _ocrProcessor.Recognize(mg); string[] str4 = str[3].Split(delimiterChars1); Rectangle recD = new Rectangle(Convert.ToInt32(str4[0]), Convert.ToInt32(str4[1]), Convert.ToInt32(str4[2]), Convert.ToInt32(str4[3])); _ocrProcessor.ROI = recD; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recD); templateViewer1.RectD = recD; text += _ocrProcessor.Recognize(mg); string[] str5 = str[4].Split(delimiterChars1); Rectangle recE = new Rectangle(Convert.ToInt32(str5[0]), Convert.ToInt32(str5[0]), Convert.ToInt32(str5[0]), Convert.ToInt32(str5[0])); _ocrProcessor.ROI = recE; _ocrProcessor.UseROI = true; templateViewer1.DrawRect(recE); templateViewer1.RectE = recE; text += _ocrProcessor.Recognize(mg); MessageBox.Show(text); }
public static string Recognize(string url,Rectangle rectangle) { WebRequest request = WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream st = response.GetResponseStream(); if (st == null) { return string.Empty; } Bitmap bitmap = ((Bitmap)Bitmap.FromStream(st)).toGray().biLinear(2); //var newBitmap= KiCut(bitmap, 12, 22, bitmap.Width - 12, 22); string tessdata = Environment.CurrentDirectory + "\\tessdata\\"; string language = "eng";//设置训练文件的名称,后缀traineddata之前的名称 int oem = 3; // http://www.lixin.me/blog/2012/05/26/29536 训练 using (TesseractProcessor processor = new TesseractProcessor()) { //初始化 bool initFlage = processor.Init(tessdata, language, oem); //processor.GetTesseractEngineVersion();//获取版本号 /* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ //processor.SetPageSegMode(ePageSegMode.PSM_SINGLE_BLOCK); //设置ROI(图像的感兴趣区域) if (rectangle != null) { processor.UseROI = true; processor.ROI = rectangle; } //设置识别的变量 如果是自定义培训的文件 可以不用设置 //必须在初始化后调用 processor.SetVariable("tessedit_char_whitelist", "0123456789."); //processor.SetVariable("tessedit_thresholding_method", "1"); //图像处理阀值是否打开 //processor.SetVariable("save_best_choices", "T"); using (Bitmap bmp = bitmap) { string text = processor.Recognize(bmp); char[] charsToTrim = { '\\', 'n', '\\', 'n' }; return text.TrimEnd(charsToTrim); } } }
/// <summary> /// Search a string into an image /// </summary> /// <param name="image">the image</param> /// <param name="inputString">the string to search</param> /// <returns>return true if the string was found</returns> private static bool checkStringByOCR(Bitmap image, string inputString) { Stopwatch a = new Stopwatch(); a.Start(); using (var bmp = image) { try { //init the OCR engine, I use tessaract as OCR engine. TesseractProcessor processor = new TesseractProcessor(); //verify if tessaract has been successfully loaded var success = processor.Init(_ocrLangData, _ocrLanguageSelected, (int)eOcrEngineMode.OEM_DEFAULT); if (!success) { //if tessaract has not been successfully loaded then write the error LogUtils.Write(new StackFrame(0, true), LogUtils.ErrorLevel.Error, "Failed to start OCR engine"); Program.Finish(true); return false; } else { //extract the string from the image string textInImage = processor.Recognize(bmp); //textInImage = textInImage.Replace('\n', ' '); //textInImage = textInImage.Replace('\r', ' '); //write a debug message if (_debugLogLevel) LogUtils.Write(new StackFrame(0, true), LogUtils.ErrorLevel.Debug, "text found: " + textInImage); //check if the string extracted from the image is equal to the string that we want to find. //NOTE that tesseract occasionally swaps a letter with another. For example, it may change //an "m" with two letters "rm" or an "l" with a "|" and so on. So i try to solve some of these errors. //You can set a debug level on the log option and search into the log file the string "text found: " and //then see text that tesseract has found if (Regex.IsMatch(textInImage, inputString) || Regex.IsMatch(textInImage.Replace("m", "rn"), inputString) || Regex.IsMatch(textInImage.Replace("rn", "m"), inputString) || Regex.IsMatch(textInImage.Replace("l", "1"), inputString) || Regex.IsMatch(textInImage.Replace("1", "l"), inputString) || Regex.IsMatch(textInImage.Replace("l", "i"), inputString) || Regex.IsMatch(textInImage.Replace("i", "l"), inputString) || Regex.IsMatch(textInImage.Replace("t", "l"), inputString) || Regex.IsMatch(textInImage.Replace("l", "t"), inputString) || Regex.IsMatch(textInImage.Replace("1", "i"), inputString) || Regex.IsMatch(textInImage.Replace("i", "1"), inputString) || Regex.IsMatch(textInImage.Replace("I", "l"), inputString) || Regex.IsMatch(textInImage.Replace("l", "I"), inputString) || Regex.IsMatch(textInImage.Replace("I", "1"), inputString) || Regex.IsMatch(textInImage.Replace("1", "I"), inputString) || Regex.IsMatch(textInImage.Replace("I", "t"), inputString) || Regex.IsMatch(textInImage.Replace("t", "I"), inputString) || Regex.IsMatch(textInImage.Replace("0", "o"), inputString) || Regex.IsMatch(textInImage.Replace("o", "0"), inputString) || Regex.IsMatch(textInImage.Replace("0", "O"), inputString) || Regex.IsMatch(textInImage.Replace("O", "0"), inputString) || Regex.IsMatch(textInImage.Replace("l", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "l"), inputString) || Regex.IsMatch(textInImage.Replace("I", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "I"), inputString) || Regex.IsMatch(textInImage.Replace("t", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "t"), inputString) || Regex.IsMatch(textInImage.Replace("i", "|"), inputString) || Regex.IsMatch(textInImage.Replace("|", "i"), inputString) || Regex.IsMatch(textInImage.Replace("M", "II"), inputString) || Regex.IsMatch(textInImage.Replace("II", "M"), inputString) || Regex.IsMatch(textInImage.Replace("ni", "m"), inputString) || Regex.IsMatch(textInImage.Replace("m", "ni"), inputString)) { a.Stop(); return true; } else return false; } } catch (Exception ex) { //write the error LogUtils.Write(ex); Program.Finish(true); return false; } } }