static void Main(string[] args) { Img(); try { string path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "20.png"); Bitmap image = new Bitmap(path); //识别图像 tessnet2.Tesseract ocr = new tessnet2.Tesseract(); //声明一个OCR类 //ocr.SetVariable("tessedit_char_whitelist", "0123456789+-="); //设置识别变量,当前只能识别数字。 //ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); //ocr.SetVariable("language_model_penalty_non_freq_dict_word", "0"); //ocr.SetVariable("language_model_penalty_non_dict_word ", "0"); //ocr.SetVariable("tessedit_char_blacklist", "xyz"); //ocr.SetVariable("classify_bln_numeric_mode", "1"); string language = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Language"); ocr.Init(language, "eng", false); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); //执行识别操作 foreach (tessnet2.Word word in result) { Console.WriteLine("{0} : {1}", word.Confidence, word.Text); } } catch (Exception ex) { throw; } }
/// <summary> /// Do the actual OCR reading /// </summary> /// <remarks>For good results, make sure: /// The text is large enough /// The background and foreground of the text are in contrast, and each has its same color /// Try to give a bitmap with less "noise" as possible /// </remarks> /// <returns>the text that was extracted from the image</returns> public string ProcessImage() { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); if (allowedChars != string.Empty) { ocr.SetVariable("tessedit_char_whitelist", allowedChars); } ocr.Init(ocrPath + @"\tessdata", "eng", false); List <tessnet2.Word> result = ocr.DoOCR(imageToRead, Rectangle.Empty); StringBuilder builder = new StringBuilder(); int lastLine = 1; foreach (tessnet2.Word word in result) { if (word.Confidence < 255.0 * (120.0 - accuracy) / 100.0) { if (lastLine < word.LineIndex) { builder.Append("\n"); lastLine = word.LineIndex; } builder.Append(word.Text + " "); } } return(builder.ToString()); }
/// <summary> /// 获取验证码 /// </summary> /// <param name="url"></param> /// <returns></returns> public static string GetValidateCode(string url, CookieContainer webCookie, out Bitmap bmpImage) { try { //获取验证码 Image HeadImage = HttpHelps.GetPicture(url, webCookie); //处理图片 Bitmap bitmap = new Bitmap(HeadImage); UnCodebase ud = new UnCodebase(bitmap); ud.GrayByPixels(); ud.ClearNoise(128, 2); //识别验证码 tessnet2.Tesseract ocr = new tessnet2.Tesseract(); //声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 ocr.Init(Application.StartupPath + @"\\tmpe", "eng", true); //应用当前语言包。 List <tessnet2.Word> result = ocr.DoOCR(ud.bmpobj, Rectangle.Empty); //执行识别操作 string validateCode = result[0].Text; //pictureBox1.Image = ud.bmpobj; bmpImage = ud.bmpobj; return(validateCode); } catch (Exception ex) { function.log("验证码获取错误" + ex.Message); bmpImage = null; return(""); } }
public string RecognizeText(IList <Image> images, int index, string lang) { using (tessnet2.Tesseract ocr = new tessnet2.Tesseract()) { ocr.Init(lang, false); IList <Image> workingImages; if (index == -1) { workingImages = images; // all images } else { workingImages = new List <Image>(); workingImages.Add(images[index]); // specific image } StringBuilder strB = new StringBuilder(); foreach (Bitmap image in workingImages) { // If the OcrDone delegate is not null then this'll be the multithreaded version //ocr.OcrDone = new tessnet2.Tesseract.OcrDoneHandler(Finished); // For event to work, must use the multithreaded version //ocr.ProgressEvent += new tessnet2.Tesseract.ProgressHandler(ProgressEvent); m_event = new ManualResetEvent(false); List <tessnet2.Word> result = ocr.DoOCR(image, rect); // Wait here it's finished //m_event.WaitOne(); if (result == null) { return(String.Empty); } for (int i = 0; i < tessnet2.Tesseract.LineCount(result); i++) { strB.AppendLine(tessnet2.Tesseract.GetLineText(result, i)); } //int lineIndex = 0; //foreach (tessnet2.Word word in result) //{ // if (lineIndex != word.LineIndex) // { // strB.AppendLine(); // lineIndex = word.LineIndex; // } // strB.Append(new string(' ', word.Blanks)).Append(word.Text); //} //strB.AppendLine(); } return(strB.ToString()); } }
public List<tessnet2.Word> DoOcrNormal(Bitmap image, string lang) { var ocr = new tessnet2.Tesseract(); ocr.Init(_tessdataPath, lang, false); var result = ocr.DoOCR(image, Rectangle.Empty); DumpResult(result); return result; }
public List<tessnet2.Word> DoOCRNormal(Bitmap image, string lang) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.Init(null, lang, false); List<tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); DumpResult(result); return result; }
public List <tessnet2.Word> DoOCRNormal(Bitmap image, string lang) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.Init(null, lang, false); List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); return(result); }
public List <tessnet2.Word> DoOCRNormal(Bitmap image, string lang) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.Init(@"C:\Users\mkmak\Documents\Visual Studio 2013\Projects\ConsoleApplication1\tessdata", lang, false); List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); DumpResult(result); return(result); }
private static void ProcessImg(Bitmap image) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"); ocr.Init(@"C:\Users\Emil-PC\Documents\Visual Studio 2015\Projects\kNN\kNN\bin\Debug\tessdata", "eng", false); List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); string resultString = ""; Dictionary <string, List <Rectangle> > CharLocations = new Dictionary <string, List <Rectangle> >(); int charCount = 0; foreach (tessnet2.Word word in result) { //Console.WriteLine(word.ToString()); resultString += word.Text + " "; foreach (tessnet2.Character character in word.CharList) { charCount++; Rectangle charPosition = FindCharLocation(character.Left, character.Right, character.Top, character.Bottom); //Console.WriteLine("{0} : {1}", character.Value.ToString(), charPosition.ToString()); List <Rectangle> allCharBounds; if (!CharLocations.TryGetValue(character.Value.ToString(), out allCharBounds)) { allCharBounds = new List <Rectangle>(); CharLocations.Add(character.Value.ToString(), allCharBounds); } allCharBounds.Add(charPosition); //G.DrawRectangle(Pens.Blue, charPosition); } } foreach (var charPositions in CharLocations) { foreach (var charLocation in charPositions.Value) { using (Bitmap croppedImage = ScaleImage(image.Clone(charLocation, image.PixelFormat), scale, scale)) { //Console.WriteLine("Char: " + charPositions.Key); FindkNN(k, kNNGroups, croppedImage, charPositions.Key); } } } foreach (var res in UltimateResult.OrderByDescending(key => key.Value)) { //Console.WriteLine(res.Key + ": " + Math.Round(res.Value / CharLocations.Count(), 3) * 100); // Console.WriteLine(res.Key + ": " + Math.Round(res.Value / (k * charCount), 3) * 100); } UltimateResult.Clear(); }
/// <summary> /// /// </summary> /// <param name="image"></param> /// <param name="lang"></param> /// <returns></returns> public List <tessnet2.Word> DoOcrNormal(Bitmap image, string lang) { var ocr = new tessnet2.Tesseract(); ocr.Init(_tessdataPath, lang, false); var result = ocr.DoOCR(image, Rectangle.Empty); DumpResult(result); return(result); }
public override string ToString() { var ocr = new tessnet2.Tesseract(); ocr.Init(null, "eng", false); ocr.SetVariable("tessedit_char_whitelist", "0123456789,$"); var result = ocr.DoOCR(this.bmp, Rectangle.Empty); return(result[0].Text.Replace("$", "")); }
public void DoOcrMultiThred(Bitmap image, string lang) { var ocr = new tessnet2.Tesseract(); ocr.Init(_tessdataPath, lang, false); // If the OcrDone delegate is not null then this'll be the multithreaded version ocr.OcrDone = Finished; // For event to work, must use the multithreaded version ocr.ProgressEvent += OcrProgressEvent; _mEvent = new ManualResetEvent(false); ocr.DoOCR(image, Rectangle.Empty); // Wait here it's finished _mEvent.WaitOne(); }
public void DoOCRMultiThred(Bitmap image, string lang) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.Init(null, lang, false); // If the OcrDone delegate is not null then this'll be the multithreaded version ocr.OcrDone = new tessnet2.Tesseract.OcrDoneHandler(Finished); // For event to work, must use the multithreaded version ocr.ProgressEvent += new tessnet2.Tesseract.ProgressHandler(ocr_ProgressEvent); m_event = new ManualResetEvent(false); ocr.DoOCR(image, Rectangle.Empty); // Wait here it's finished m_event.WaitOne(); }
public void Read() { ////do ocr foreach (string file in Directory.GetFiles(iDir)) { if (file.EndsWith(".tif", true, null) || file.EndsWith(".tiff", true, null))// || file.EndsWith(".pdf", true, null)) { string fileName = file.Substring(file.LastIndexOf("\\")); fileName = fileName.Substring(0, fileName.LastIndexOf(".")); Bitmap image = new Bitmap(file); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.Init(null, "ENG", false); List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); image = ocr.GetThresholdedImage(image, Rectangle.Empty); //// fileName += ".bmp"; //// image.Save(fileName); //// for (int i = 0; i < oDir.Length; i++) { if (oType[i] == "PDF") { fileName += ".pdf"; ////create pdf } else if (oType[i] == "DOC") { fileName += ".doc"; ////create doc } if (oEmail[i]) { try { SendMail(Properties.Settings.Default.SMTPServer, Properties.Settings.Default.SMTPPort, Properties.Settings.Default.SSL, Properties.Settings.Default.Username, Properties.Settings.Default.Password, Properties.Settings.Default.FromAddress, Properties.Settings.Default.FromName, Properties.Settings.Default.Subject, Properties.Settings.Default.Message, Properties.Settings.Default.HTML, new string[] { oDir[i] }, new string[] { fileName }); File.Delete(fileName); } catch (Exception ex) { System.Windows.Forms.MessageBox.Show(ex.ToString(), "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error); } } else { File.Move(fileName, oDir[i] + fileName); } } } } }
/// <summary> /// /// </summary> /// <param name="image"></param> /// <param name="lang"></param> public void DoOcrMultiThred(Bitmap image, string lang) { var ocr = new tessnet2.Tesseract(); ocr.Init(_tessdataPath, lang, false); // If the OcrDone delegate is not null then this'll be the multithreaded version ocr.OcrDone = Finished; // For event to work, must use the multithreaded version ocr.ProgressEvent += OcrProgressEvent; _mEvent = new ManualResetEvent(false); ocr.DoOCR(image, Rectangle.Empty); // Wait here it's finished _mEvent.WaitOne(); }
public void pdf() { Bitmap image = new Bitmap("eurotext.tif"); Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789"); // If digit only ocr.Init(@"c:\temp", "fra", false); // To use correct tessdata List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); foreach (tessnet2.Word word in result) { Console.WriteLine("{0} : {1}", word.Confidence, word.Text); } }
public void DoOCRMultiThred(Bitmap image, string lang) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"); var path = string.Concat(Application.StartupPath, @"\tessdata"); ocr.Init(path, lang, false); // If the OcrDone delegate is not null then this'll be the multithreaded version ocr.OcrDone = new tessnet2.Tesseract.OcrDoneHandler(Finished); // For event to work, must use the multithreaded version ocr.ProgressEvent += new tessnet2.Tesseract.ProgressHandler(ocr_ProgressEvent); m_event = new ManualResetEvent(false); ocr.DoOCR(image, Rectangle.Empty); // Wait here it's finished m_event.WaitOne(); }
private void button1_Click(object sender, EventArgs e) { string strPath = textBox1.Text; Bitmap image = new Bitmap(strPath);//识别图像 tessnet2.Tesseract ocr = new tessnet2.Tesseract();//声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 ocr.Init(@"D:\tessdata", "eng", false); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list List<tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty);//执行识别操作 string strShow = ""; foreach (tessnet2.Word word in result) { strShow = string.Format("{0} : {1}", word.Confidence, word.Text); MessageBox.Show(strShow); } }
public void DoOCRMultiThred(Bitmap image, string lang) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"); var path = string.Concat(Application.StartupPath, @"\tessdata"); ocr.Init(path, lang, false); // If the OcrDone delegate is not null then this'll be the multithreaded version ocr.OcrDone = new tessnet2.Tesseract.OcrDoneHandler(Finished); // For event to work, must use the multithreaded version m_event = new ManualResetEvent(false); ocr.DoOCR(image, Rectangle.Empty); // Wait here it's finished m_event.WaitOne(); }
static Tuple <string, int> OCRBitmapV2(Bitmap bSprite) { tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789"); ocr.Init(@"", "eng", false); List <tessnet2.Word> result = ocr.DoOCR(bSprite, Rectangle.Empty); var resultText = string.Join("", result.Select(i => i.Text)); var resultValue = -101; if (int.TryParse(resultText, out resultValue)) { } return(new Tuple <string, int>(resultText, resultValue)); }
private void processarToolStripMenuItem_Click(object sender, EventArgs e) { var image = (Bitmap)currentImage.Clone(); var ocr = new tessnet2.Tesseract(); ocr.Init(@"tessdata", "eng", false); ocr.SetVariable("tesseract_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVXWYZ-1234567890"); var result = ocr.DoOCR(image, Rectangle.Empty); StringBuilder sb = new StringBuilder(); foreach (tessnet2.Word word in result) { sb.Append(word.Text + " "); } MessageBox.Show(String.Format(sb.ToString())); }
private void button2_Click(object sender, EventArgs e) { var image = (Bitmap)pictureBox1.Image; var ocr = new tessnet2.Tesseract(); ocr.Init(@"tessdata", "eng", true); ocr.SetVariable("tesseract_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVXWYZ-1234567890"); var result = ocr.DoOCR(image, Rectangle.Empty); StringBuilder sb = new StringBuilder(); foreach (tessnet2.Word word in result) { sb.Append(word.Text + " "); } textBox1.Text = sb.ToString(); }
private static void GetValue(HttpHelper httpHelper, string url) { var bytes = httpHelper.HttpByteGet(url, string.Empty, false, false, 60 * 1000); var image = Bitmap(bytes); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); string language = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Language"); ocr.Init(language, "eng", false); List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); foreach (tessnet2.Word word in result) { Console.WriteLine("{0} : {1}", word.Confidence, word.Text); } }
/// <summary> /// Gets a screenshot of the active window (already optimised for OCR), /// calculates the thresholded image, /// runs the OCR, /// and returns the result as a context entry. /// </summary> /// <param name="ce"></param> /// <returns></returns> public ContextEntry RunOcr(ContextEntry ce) { if (_tEngine == null) { return(null); } try { // run OCR preprocessing RunOcrPreProcessing(ce.Screenshot); // processed screenshot var processedScreenshot = ce.Screenshot.Image; // threshold //screenshot = _tEngine.GetThresholdedImage(screenshot, Rectangle.Empty); //TODO: enable or disable? //Screenshot.SaveImage(screenshot, "thresholded"); //TEMP _tEngine.OcrDone += OcrFinished; //var result = _tEngine.DoOCR(screenshot, Rectangle.Empty); // used for single-threading OCR processing _tEngine.DoOCR(processedScreenshot, Rectangle.Empty); processedScreenshot.Dispose(); _mEvent = new ManualResetEvent(false); _mEvent.WaitOne(); // wait here until it's finished // add ocr'd text to context entry ce.OcrText = _temporaryOcrText; ce.Confidence = _tempConfidence; // reset temp values _temporaryOcrText = string.Empty; _tempConfidence = Settings.OcrConfidenceAcceptanceThreshold; // release sources ce.Screenshot.Dispose(); ce.Screenshot = null; } catch (Exception e) { Logger.WriteToLogFile(e); } return(ce); }
private void button1_Click(object sender, EventArgs e) { string strPath = textBox1.Text; Bitmap image = new Bitmap(strPath); //识别图像 tessnet2.Tesseract ocr = new tessnet2.Tesseract(); //声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 ocr.Init(@"D:\tessdata", "eng", false); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); //执行识别操作 string strShow = ""; foreach (tessnet2.Word word in result) { strShow = string.Format("{0} : {1}", word.Confidence, word.Text); MessageBox.Show(strShow); } }
private void processo2() { var img = (Bitmap)pictureBox1.Image.Clone(); var showImg = (Bitmap)pictureBox1.Image.Clone(); //img = Grayscale.CommonAlgorithms.Y.Apply(img); img = new OtsuThreshold().Apply(img); img = new Erosion().Apply(img); img = new Invert().Apply(img); BlobCounter bc = new BlobCounter(); bc.BackgroundThreshold = Color.Black; bc.ProcessImage(img); Rectangle rect = new Rectangle(0, 0, showImg.Width, showImg.Height); BitmapData bmpData = showImg.LockBits(rect, ImageLockMode.ReadWrite, showImg.PixelFormat); bc.GetObjectsRectangles().ToList().ForEach(i => { Crop filter = new Crop(new Rectangle(i.X, i.Y, 230, 75)); var img2 = (Bitmap)filter.Apply(img); img2 = new Invert().Apply(img2); var ocr = new tessnet2.Tesseract(); ocr.SetVariable("tesseract_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVXWYZ-1234567890"); ocr.Init(@"tessdata", "eng", false); var result = ocr.DoOCR(img2, Rectangle.Empty); StringBuilder sb = new StringBuilder(); foreach (tessnet2.Word word in result) { sb.Append(word.Text + " "); } //cliente para servidor string aux; aux = sb.ToString(); if (aux.Length >= 6) { placas.Add(aux); } pictureBox1.Image = img2; }); showImg.UnlockBits(bmpData); pictureBox1.Image = img; }
public string RecognizeText(IList<Image> images, string lang) { using (tessnet2.Tesseract ocr = new tessnet2.Tesseract()) { ocr.Init(null, lang, false); StringBuilder strB = new StringBuilder(); foreach (Bitmap image in images) { // If the OcrDone delegate is not null then this'll be the multithreaded version //ocr.OcrDone = new tessnet2.Tesseract.OcrDoneHandler(Finished); // For event to work, must use the multithreaded version //ocr.ProgressEvent += new tessnet2.Tesseract.ProgressHandler(ProgressEvent); m_event = new ManualResetEvent(false); List<tessnet2.Word> result = ocr.DoOCR(image, rect); // Wait here it's finished //m_event.WaitOne(); if (result == null) return String.Empty; for (int i = 0; i < tessnet2.Tesseract.LineCount(result); i++) { strB.AppendLine(tessnet2.Tesseract.GetLineText(result, i)); } //int lineIndex = 0; //foreach (tessnet2.Word word in result) //{ // if (lineIndex != word.LineIndex) // { // strB.AppendLine(); // lineIndex = word.LineIndex; // } // strB.Append(new string(' ', word.Blanks)).Append(word.Text); //} //strB.AppendLine(); } return strB.ToString(); } }
public async Task <OCRResult> UploadFile() { // Verify that this is an HTML Form file upload request if (!Request.Content.IsMimeMultipartContent("form-data")) { throw new HttpResponseException(Request.CreateResponse(HttpStatusCode.UnsupportedMediaType)); } // Create a stream provider for setting up output streams MultipartFormDataStreamProvider streamProvider = new MultipartFormDataStreamProvider(ServerUploadFolder); // Read the MIME multipart asynchronously content using the stream provider we just created. await Request.Content.ReadAsMultipartAsync(streamProvider); Bitmap image = new Bitmap(streamProvider.FileData.Select(e => e.LocalFileName).First()); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); //ocr.SetVariable("tessedit_char_whitelist", "0123456789"); // If digit only try { string path = System.Web.HttpContext.Current.Server.MapPath("~/tessdata"); ocr.Init(path, "eng", false); // To use correct tessdata List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); OCRResult res = new OCRResult(); res.FileNames = streamProvider.FileData.Select(entry => entry.LocalFileName); res.RecognizedTextItems = new List <OCRItem>(); foreach (tessnet2.Word word in result) { (res.RecognizedTextItems as List <OCRItem>).Add( new OCRItem() { Text = word.Text, Confidence = word.Confidence } ); } return(res); } catch (Exception e) { throw e; } }
public async Task<OCRResult> UploadFile() { // Verify that this is an HTML Form file upload request if (!Request.Content.IsMimeMultipartContent("form-data")) { throw new HttpResponseException(Request.CreateResponse(HttpStatusCode.UnsupportedMediaType)); } // Create a stream provider for setting up output streams MultipartFormDataStreamProvider streamProvider = new MultipartFormDataStreamProvider(ServerUploadFolder); // Read the MIME multipart asynchronously content using the stream provider we just created. await Request.Content.ReadAsMultipartAsync(streamProvider); Bitmap image = new Bitmap(streamProvider.FileData.Select(e => e.LocalFileName).First()); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); //ocr.SetVariable("tessedit_char_whitelist", "0123456789"); // If digit only try { string path = System.Web.HttpContext.Current.Server.MapPath("~/tessdata"); ocr.Init(path, "eng", false); // To use correct tessdata List<tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); OCRResult res = new OCRResult(); res.FileNames = streamProvider.FileData.Select(entry => entry.LocalFileName); res.RecognizedTextItems = new List<OCRItem>(); foreach (tessnet2.Word word in result) { (res.RecognizedTextItems as List<OCRItem>).Add( new OCRItem() { Text = word.Text, Confidence = word.Confidence } ); } return res; } catch (Exception e) { throw e; } }
private void button1_Click(object sender, EventArgs e) { System.Drawing.Bitmap img = Run(); UnCodebase ud = new UnCodebase(img); img = ud.GrayByPixels(); ud.ClearNoise(128, 2); pictureBox1.Image = img; tessnet2.Tesseract ocr = new tessnet2.Tesseract(); //声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 ocr.Init(Application.StartupPath + @"\\tmpe", "eng", true); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list List <tessnet2.Word> result = ocr.DoOCR(img, Rectangle.Empty); //执行识别操作 string code = result[0].Text; textBox1.Text = code; }
public static Point?Buscar(string texto) { using (Bitmap bmp = GetCapture()) { var ocr = new tessnet2.Tesseract(); ocr.Init("tesseract", "spa", false); var result = ocr.DoOCR(bmp, Rectangle.Empty); foreach (tessnet2.Word word in result) { if (word.Text.Equals(texto)) { return(new Point(word.Right, word.Bottom)); } } } return(null); }
private string GetText(Image img, tessnet2.Tesseract ocr, Rectangle cropRect) { Bitmap src = new Bitmap(img); Bitmap target = new Bitmap(cropRect.Width, cropRect.Height); using (Graphics g = Graphics.FromImage(target)) { g.DrawImage(src, new Rectangle(0, 0, target.Width, target.Height), cropRect, GraphicsUnit.Pixel); } // target = make_bw(target); target = ResizeBitmap(target, target.Width * 10, target.Height * 10); List <tessnet2.Word> result = ocr.DoOCR(target, Rectangle.Empty); return(result[0].Text); }
static void Main(string[] args) { try { Bitmap image = new Bitmap(@"C:\Users\Nino\Documents\PbpHackthone\dotNet_OCR_server\images\IMAG0253_cropped.jpg"); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789"); // If digit only ocr.Init(@"C:\Users\Nino\Desktop\tessdata", "eng", false); // To use correct tessdata List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); foreach (tessnet2.Word word in result) { Console.WriteLine("{0} : {1}", word.Confidence, word.Text); } } catch (Exception ex) { Console.WriteLine(ex.Message); } Console.Read(); }
private void btn_start_Click(object sender, EventArgs e) { try { /* var engine = new TesseractEngine(path, "eng", EngineMode.Default); var page = engine.Process(image); output = page.GetText(); rich_status.Text += output; rich_status.Text += recognize(output); rich_status.Text += "\nagi here" + checkHasDate(output) + getDate(output); */ tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,$-/#&=()'"); ocr.Init(path + @"\tessdata", "eng", false); List<tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); foreach (tessnet2.Word word in result) { count++; totalConfidence += word.Confidence; output = string.Join(" ", result.Select(x => x.Text).ToList()); rich_status.Text = output; using (StreamWriter writetext = new StreamWriter(path + @"\Results.txt")) { writetext.WriteLine(output); } } rich_status.Text += recognize(output); txt_date.Text = getDate(output); txt_type.Text = getCertificateType(output); totalConfidence = (totalConfidence / count); txt_confidence.Text = totalConfidence.ToString(); } catch (Exception ex) { } }
public static string TryOcr(this Image bitmap) { Bitmap temp = new Bitmap(bitmap); temp = temp.GrayByPixels().ClearNoise(128, 2); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); // If digit only ocr.Init(@"lib\tessdata", "eng", false); // To use correct tessdata List <tessnet2.Word> result = ocr.DoOCR(temp, Rectangle.Empty); foreach (tessnet2.Word word in result) { SimpleConsole.WriteLine(string.Format("{0} : {1}", word.Confidence, word.Text)); } if (result.Count > 0) { return(result[0].Text); } return(string.Empty); }
public string GetValidateCode(string base64Image) { try { foreach (var item in base64ImageStart) { base64Image = base64Image.Replace(item, ""); } //过滤特殊字符即可 string dummyData = base64Image.Replace("%0A", "").Replace("%0D", ""); byte[] arr = Convert.FromBase64String(dummyData); MemoryStream ms = new MemoryStream(arr); Bitmap bmp = new Bitmap(ms); ms.Close(); /*Form frm1 = new Form(); * frm1.BackgroundImageLayout = ImageLayout.Zoom; * frm1.BackgroundImage = bmp; * frm1.Show(); * UnCodebase codebase = new UnCodebase(bmp); * Bitmap bitmap = codebase.GrayByPixels(); //灰度处理 * codebase.ClearNoise();//128, 2 * Form frm2 = new Form(); * frm2.BackgroundImageLayout = ImageLayout.Zoom; * frm2.BackgroundImage = bitmap; * frm2.Show();*/ tessnet2.Tesseract ocr = new tessnet2.Tesseract();//声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); ocr.Init(Application.StartupPath + "\\tessdata", "eng", false); List <tessnet2.Word> result = ocr.DoOCR(bmp, Rectangle.Empty);//执行识别操作 bmp.Dispose(); string res = result[0].Text; return(res); } catch (Exception ex) { return(null); } }
public override DecoderItem Decode(byte[] input, VisualDecoderSettings settings) { var result = new DecoderItem(); var language = settings.OCRLanguage.ToString(); //use eng if the user dont know the language if (language.Equals(VisualDecoderSettings.OCRLanguages.unkown.ToString())) { language = VisualDecoderSettings.OCRLanguages.eng.ToString(); } using (var ocr = new tessnet2.Tesseract()){ try { ocr.Init(TessractData, language, settings.NumericMode); List <tessnet2.Word> r1 = ocr.DoOCR(ByteArrayToImage(input), Rectangle.Empty); //aggregate resultTest string resultText = ""; int lc = tessnet2.Tesseract.LineCount(r1); for (int i = 0; i < lc; i++) { resultText += tessnet2.Tesseract.GetLineText(r1, i) + "\n"; } //fill result result.CodePayload = resultText; result.CodeType = "none"; result.BitmapWithMarkedCode = input; ocr.Clear(); } catch (Exception) // well, the ocr lib sucks... it sometimes trows memory leaks. // But it is the best opensource lib available. { } } return(result); }
private static int[,] RecognizeDigits() { int[,] sudoku = new int[9, 9]; for (int i = 0; i < 9; ++i) { for (int j = 0; j < 9; ++j) { Bitmap image = new Bitmap(outDir + i.ToString() + j.ToString() + ".png"); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.Init(tessData, "eng", false); // To use correct tessdata List <tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty); foreach (tessnet2.Word word in result) { Regex regex = new Regex(@"\d+"); Match match = regex.Match(word.Confidence < CONFIDENCE ? word.Text : ""); sudoku[i, j] = match.Value == "" ? 0 : Int32.Parse(match.Value); Console.WriteLine("{0} : {1}", word.Confidence, word.Text); } } } return(sudoku); }
public string RecognizeText(IList<Image> images, int index, string lang, BackgroundWorker worker, DoWorkEventArgs e) { // Abort the operation if the user has canceled. // Note that a call to CancelAsync may have set // CancellationPending to true just after the // last invocation of this method exits, so this // code will not have the opportunity to set the // DoWorkEventArgs.Cancel flag to true. This means // that RunWorkerCompletedEventArgs.Cancelled will // not be set to true in your RunWorkerCompleted // event handler. This is a race condition. this.worker = worker; if (worker.CancellationPending) { e.Cancel = true; return String.Empty; } using (tessnet2.Tesseract ocr = new tessnet2.Tesseract()) { ocr.Init(lang, false); IList<Image> workingImages; if (index == -1) { workingImages = images; // all images } else { workingImages = new List<Image>(); workingImages.Add(images[index]); // specific image } StringBuilder strB = new StringBuilder(); foreach (Bitmap image in workingImages) { // If the OcrDone delegate is not null then this'll be the multithreaded version //ocr.OcrDone = new tessnet2.Tesseract.OcrDoneHandler(Finished); // For event to work, must use the multithreaded version //ocr.ProgressEvent += new tessnet2.Tesseract.ProgressHandler(ProgressEvent); m_event = new ManualResetEvent(false); List<tessnet2.Word> result = ocr.DoOCR(image, rect); // Wait here it's finished //m_event.WaitOne(); if (result == null) return String.Empty; for (int i = 0; i < tessnet2.Tesseract.LineCount(result); i++) { strB.AppendLine(tessnet2.Tesseract.GetLineText(result, i)); } //int lineIndex = 0; //foreach (tessnet2.Word word in result) //{ // if (lineIndex != word.LineIndex) // { // strB.AppendLine(); // lineIndex = word.LineIndex; // } // strB.Append(new string(' ', word.Blanks)).Append(word.Text); //} //strB.AppendLine(); } return strB.ToString(); } }
private void BtnRun_Click(object sender, EventArgs e) { this.webBrowser1.Document.GetElementById("mailNum").InnerText = AppInputParameters.Current.EMSCode ; HtmlElementCollection ret = webBrowser1.Document.All.GetElementsByName("checkCode"); Image img = GetRegCodePic(this.webBrowser1, "checkCode", null, null); if (img == null) return; System.Drawing.Bitmap bitmap = new Bitmap(img); UnCodebase ud = new UnCodebase(bitmap); Bitmap processedImg = ud.GrayByPixels(); ud.ClearNoise(128, 2); tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789"); ocr.Init(Application.StartupPath + @"\\tmpe", "eng", true); List<tessnet2.Word> result = ocr.DoOCR(processedImg, Rectangle.Empty); string code = result[0].Text; foreach (HtmlElement item in ret) { item.InnerText = code; } HtmlElement head = webBrowser1.Document.GetElementsByTagName("head")[0]; HtmlElement scriptEl = webBrowser1.Document.CreateElement("script"); IHTMLScriptElement element = (IHTMLScriptElement)scriptEl.DomElement; element.text = "function SingleFormSubmit() { $(\"*\").hide(); $(\"#singleForm\").submit();}"; head.AppendChild(scriptEl); webBrowser1.Document.InvokeScript("SingleFormSubmit"); }
private string GetCheckCode(string strPath) { string strShow = ""; lock(m_hLockTable) { Bitmap image = new Bitmap(strPath);//识别图像 tessnet2.Tesseract ocr = new tessnet2.Tesseract();//声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 string strDirect = System.AppDomain.CurrentDomain.BaseDirectory; strDirect += "tessdata"; ocr.Init(strDirect, "eng", false); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list List<tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty);//执行识别操作 foreach (tessnet2.Word word in result) { //strShow = string.Format("{0} : {1}", word.Confidence, word.Text); strShow += word.Text.Trim(); } image.Dispose(); } return strShow; }
public string getCheckStr(string web, CookieContainer cookie) { string checkStr = ""; string s = ""; string checkUrl = web + @"/index.php?m=Public&a=verify_code"; HttpWebRequest getCheckWebRequest = (HttpWebRequest)HttpWebRequest.Create(checkUrl); getCheckWebRequest.Method = "GET"; getCheckWebRequest.Host = "www.nmgqxfw.com"; getCheckWebRequest.KeepAlive = true; getCheckWebRequest.Accept = @"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; getCheckWebRequest.UserAgent = @"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.102 Safari/537.36"; getCheckWebRequest.Referer = web + @"/system.php?m=Public&a=index"; getCheckWebRequest.Headers.Add(@"Accept-Encoding", @"gzip,deflate,sdch"); getCheckWebRequest.Headers.Add(@"Accept-Language", @"zh-CN,zh;q=0.8,en;q=0.6"); getCheckWebRequest.CookieContainer = cookie; Bitmap img, imgsource; using (HttpWebResponse webResponse = (HttpWebResponse)getCheckWebRequest.GetResponse()) { using (Stream respStream = webResponse.GetResponseStream()) { byte[] buffer = new byte[100000]; int read, total = 0; while ((read = respStream.Read(buffer, total, 1000)) != 0) { total += read; } imgsource = (Bitmap)Bitmap.FromStream(new MemoryStream(buffer, 0, total)); img = imgsource.Clone(new System.Drawing.Rectangle(0, 0, imgsource.Width, imgsource.Height), PixelFormat.Format24bppRgb); respStream.Close(); } } try { img.Save(@"E:\迅雷下载\zdyzmsb\BMP\origin.bmp"); img = BlackAndWhite(img, 0.8); //img = new Bitmap(img, img.Width * 3, img.Height * 3); //Color originC; //int whit = 255, black = 0; //int nearVal = 0; //int MaxNearPoints = 3; //int width = img.Width; //int height = img.Height; //byte[] p = new byte[9]; //for (int x = 0; x < width; x++) //{ // for (int y = 0; y < height; y++) // { // originC = img.GetPixel(x, y); // if (!(255 == originC.R && 255 == originC.G && 255 == originC.B)) // { // img.SetPixel(x, y, Color.FromArgb(originC.A, black, black, black)); // nearVal = 0; // if (0 == x || img.Width - 1 == x || 0 == y || img.Height - 1 == y) // { // img.SetPixel(x, y, Color.FromArgb(originC.A, whit, whit, whit)); // } // else // { // if (black == img.GetPixel(x - 1, y - 1).R) nearVal++; // if (black == img.GetPixel(x, y - 1).R) nearVal++; // if (black == img.GetPixel(x + 1, y - 1).R) nearVal++; // if (black == img.GetPixel(x - 1, y).R) nearVal++; // if (black == img.GetPixel(x + 1, y).R) nearVal++; // if (black == img.GetPixel(x - 1, y + 1).R) nearVal++; // if (black == img.GetPixel(x, y + 1).R) nearVal++; // if (black == img.GetPixel(x + 1, y + 1).R) nearVal++; // } // if (nearVal < MaxNearPoints) // img.SetPixel(x, y, Color.FromArgb(originC.A, whit, whit, whit)); // } // } //} ////img.Save("black.png"); //Bitmap img1 = img.Clone(new Rectangle(0, 0, 16, img.Height), PixelFormat.Format24bppRgb); //img1.Save("1.png"); //Bitmap img2 = img.Clone(new Rectangle(17, 0, 17, img.Height), PixelFormat.Format24bppRgb); //img2.Save("2.png"); //Bitmap img3 = img.Clone(new Rectangle(35, 0, 17, img.Height), PixelFormat.Format24bppRgb); //img3.Save("3.png"); //Bitmap img4 = img.Clone(new Rectangle(53, 0, 17, img.Height), PixelFormat.Format24bppRgb); //img4.Save("4.png"); tessnet2.Tesseract ocr = new tessnet2.Tesseract();//声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); //设置识别变量,当前只能识别数字。 ocr.Init(@"E:\Projects\CSharp\Test\Test\Lan", "eng", true); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list //List<tessnet2.Word> result = ocr.DoOCR(img, Rectangle.Empty);//执行识别操作 Monitor.Enter(this);//因为是多线程,所以用到了Monitor。 System.Collections.Generic.List<tessnet2.Word> result = ocr.DoOCR(img, System.Drawing.Rectangle.Empty);//执行识别操作 foreach (tessnet2.Word word in result) //遍历识别结果。 s = s + word.Text; Monitor.Exit(this); if (s.Length > 2) s = s.Substring(2, s.Length - 2); checkStr = result[0].Text; } catch { } return checkStr; }
public Dictionary<string, string> UseTessnet2(string imgFile, Dictionary<string, Rectangle> pDicOcrRect) { this.ErrMsg = string.Empty; string defLang = "eus"; Dictionary<string, string> dicResult = new Dictionary<string, string>(); if (pDicOcrRect == null) { pDicOcrRect = new Dictionary<string, Rectangle>(); pDicOcrRect.Add(this.emptyRCKey, Rectangle.Empty); } tessnet2.Tesseract ocr = new tessnet2.Tesseract(); ocr.Init(this.TESSDATA, defLang, false); using (Bitmap m_image = new Bitmap(imgFile)) { foreach (var recItem in pDicOcrRect) { Rectangle rcTemp = recItem.Value; if (rcTemp != Rectangle.Empty) { int dx = Convert.ToInt16(m_image.HorizontalResolution / 100); int dy = Convert.ToInt16(m_image.VerticalResolution / 100); rcTemp = new Rectangle(recItem.Value.X * dx, recItem.Value.Y * dy, recItem.Value.Width * dx, recItem.Value.Height * dy); } List<tessnet2.Word> m_words = ocr.DoOCR(m_image, rcTemp); StringBuilder sbWords = new StringBuilder(); foreach (var item in m_words) { sbWords.AppendLine(item.Text); } string text = sbWords.ToString(); if (!string.IsNullOrEmpty(text)) { //// correct common errors caused by OCR //text = TextUtilities.CorrectOCRErrors(text); //// correct letter cases //text = TextUtilities.CorrectLetterCases(text); text = text.Replace("\n", Environment.NewLine); } else { text = string.Empty; } dicResult.Add(recItem.Key, text); } } return dicResult; }
private void button1_Click(object sender, EventArgs e) { Bitmap bitmap = (Bitmap) Bitmap.FromFile("OKK.png"); UnCodebase ud = new UnCodebase(bitmap); bitmap = ud.GrayByPixels(); ud.ClearNoise(128, 2); pictureBox1.Image = bitmap; tessnet2.Tesseract ocr = new tessnet2.Tesseract();//声明一个OCR类 ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIGKLMNOPQRSTUVWXYZ"); //设置识别变量,当前只能识别数字。 string path = Application.StartupPath + @"\tmpe\"; ocr.Init(path, "eng", false); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list List<tessnet2.Word> result = ocr.DoOCR(bitmap, Rectangle.Empty);//执行识别操作 string code = result[0].Text; textBox1.Text = code; return; //System.Drawing.Bitmap img = Run(); //UnCodebase ud = new UnCodebase(img); //img = ud.GrayByPixels(); //ud.ClearNoise(128, 2); //pictureBox1.Image = img; //tessnet2.Tesseract ocr = new tessnet2.Tesseract();//声明一个OCR类 //ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 //ocr.Init(Application.StartupPath + @"\\tmpe", "eng", true); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list //List<tessnet2.Word> result = ocr.DoOCR(img, Rectangle.Empty);//执行识别操作 //string code = result[0].Text; //textBox1.Text = code; }