private void DefaultSettings() { engine.SetVariable("tessedit_char_whitelist", "0123456789"); // show only digits engine.SetVariable("load_system_dawg", false); // disable dictionary values engine.SetVariable("load_freq_dawg", false); // disable dictionary values engine.DefaultPageSegMode = PageSegMode.SingleWord; }
/// <summary> /// 识别印刷票号 /// </summary> /// <param name="imgs"></param> /// <returns></returns> private string OCRTicketNo(IList <Bitmap> imgs) { string res = ""; using (var engineLetter = new TesseractEngine(@"tessdata", "eng", EngineMode.TesseractOnly)) { engineLetter.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); engineLetter.SetVariable("tessedit_unrej_any_wd", true); engineLetter.DefaultPageSegMode = PageSegMode.SingleChar; using (var page = engineLetter.Process(imgs[0], PageSegMode.SingleChar)) res += page.GetText().Substring(0, 1); } imgs.RemoveAt(0); using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.TesseractOnly)) { engine.SetVariable("tessedit_char_whitelist", "1234567890"); engine.SetVariable("tessedit_unrej_any_wd", true); engine.DefaultPageSegMode = PageSegMode.SingleChar; foreach (Bitmap img in imgs) { using (var page = engine.Process(img, PageSegMode.SingleChar)) res += page.GetText().Substring(0, 1); } } Console.WriteLine("OCR Result = " + res); return(res); }
public static string OCR(Bitmap b) { try { string res = string.Empty; string path = $@"{Environment.CurrentDirectory}\tessdata\"; using (var engine = new TesseractEngine(path, "eng")) { string letters = "abcdefghijklmnopqrstuvwxyz"; string numbers = "0123456789"; engine.SetVariable("tessedit_char_whitelist", $"{numbers}{letters}{letters.ToUpper()}"); engine.SetVariable("tessedit_unrej_any_wd", true); engine.SetVariable("tessedit_adapt_to_char_fragments", true); engine.SetVariable("tessedit_redo_xheight", true); engine.SetVariable("chop_enable", true); Bitmap x = b.Clone(new Rectangle(0, 0, b.Width, b.Height), System.Drawing.Imaging.PixelFormat.Format24bppRgb); using (var page = engine.Process(x, PageSegMode.SingleLine)) res = page.GetText().Replace(" ", "").Trim(); } return(res); } catch (Exception ex) { ////MessageBox.Show($"Erro: {ex.Message}"); return(null); } }
// 464/19516 static void Main(string[] args) { int hit, all; hit = all = 0; using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { foreach (var file in Directory.EnumerateFiles(@"D:\dataset\easy\first try\read_and_segmented")) { string code = Path.GetFileNameWithoutExtension(file); engine.DefaultPageSegMode = PageSegMode.SingleLine; engine.SetVariable("load_system_dawg", "false"); engine.SetVariable("load_freq_dawg", "false"); engine.SetVariable("tessedit_char_whitelist", "-2346789BCDFGHJKMPQRTVWXY"); using (var img = Pix.LoadFromFile(file)) using (var page = engine.Process(img)) { var text = page.GetText().Trim(); if (text == code) { ++hit; } } ++all; } } Console.WriteLine($"{hit}/{all}"); Console.ReadKey(); }
/// <summary> /// Reads tessdata/configs/tess_configvars and SetVariable on Tesseract engine. /// This only works for non-init parameters (@see <a href="https://code.google.com/p/tesseract-ocr/wiki/ControlParams">ControlParams</a>). /// </summary> /// <param name="engine"></param> void ControlParameters(TesseractEngine engine) { string configsFilePath = Path.Combine(Datapath, "tessdata/configs/" + CONFIGVARS_FILE); if (!File.Exists(configsFilePath)) { return; } string[] lines = File.ReadAllLines(configsFilePath); foreach (string line in lines) { if (!line.Trim().StartsWith("#")) { try { string[] keyValuePair = line.Trim().Split(new char[0], StringSplitOptions.RemoveEmptyEntries); string value = keyValuePair[1]; if (value == "T" || value == "F") { engine.SetVariable(keyValuePair[0], value == "T" ? true : false); } else { engine.SetVariable(keyValuePair[0], keyValuePair[1]); } } catch { //ignore and continue on } } } }
private void InitTesseract() { var tesseractData = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata"); _englishOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm); _englishOcr.DefaultPageSegMode = PageSegMode.SingleLine; _englishOcr.SetVariable("tessedit_char_whitelist", "qwertyuiopasdfghjklzxcvbnm QWERTYUIOPASDFGHJKLZXCVBNM"); _englishOcr.SetVariable("tessedit_zero_rejection", true); _englishOcr.SetVariable("load_freq_dawg", false); _englishOcr.SetVariable("load_system_dawg", false); _englishPunctOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm); _englishPunctOcr.DefaultPageSegMode = PageSegMode.SingleLine; _englishPunctOcr.SetVariable("tessedit_char_whitelist", "qwertyuiopasdfghjklzxcvbnm QWERTYUIOPASDFGHJKLZXCVBNM0123456789/*-+:,.%"); _englishPunctOcr.SetVariable("tessedit_zero_rejection", true); _englishPunctOcr.SetVariable("load_freq_dawg", false); _englishPunctOcr.SetVariable("load_system_dawg", false); _numbersOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm); _numbersOcr.DefaultPageSegMode = PageSegMode.SingleLine; _numbersOcr.SetVariable("tessedit_char_whitelist", "0123456789"); _numbersOcr.SetVariable("tessedit_zero_rejection", true); _numbersOcr.SetVariable("load_freq_dawg", false); _numbersOcr.SetVariable("load_system_dawg", false); _numbersScaledOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm); _numbersScaledOcr.DefaultPageSegMode = PageSegMode.SingleLine; _numbersScaledOcr.SetVariable("tessedit_char_whitelist", "0123456789.,KMB"); _numbersScaledOcr.SetVariable("tessedit_zero_rejection", true); _numbersScaledOcr.SetVariable("load_freq_dawg", false); _numbersScaledOcr.SetVariable("load_system_dawg", false); Logger.Info("Tessract engines initialized"); }
public static void Run() { var bitmap = new Bitmap("E:\\dev\\venomsw\\images\\cropped_rune.png"); var scaled = ScaleBitmap(bitmap, bitmap.Width * 2, bitmap.Height * 2); //TODO debug only if (analyzer.ShouldGetRune(scaled)) { return; } TesseractEngine engine = new TesseractEngine(@"E:\\dev\\venomsw\\venomsw\\tessdata", "eng", EngineMode.Default, "venom"); engine.SetVariable("language_model_penalty_non_freq_dict_word", "1"); engine.SetVariable("language_model_penalty_non_dict_word", "1"); using (Page page = engine.Process(scaled, PageSegMode.SingleBlock)) { Console.WriteLine(page.GetText()); } scaled.Save("E:\\dev\\venomsw\\images\\cropped_rune3_nobg.png"); }
public SVM(string TrainedDataInputFile) //// Do training for all existing trained Data { _engine = new TesseractEngine(@"./tessdata3", "eng", EngineMode.TesseractAndCube); _engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); _engine.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./"); string[] TrainedData = Directory.GetFiles(TrainedDataInputFile, "*.png"); double[][] inputs = new double[TrainedData.Length][]; /// double[] InputArray = new double[784]; int[] Outputs = new int[TrainedData.Length]; for (int i = 0; i < TrainedData.Length; i++) { string filename = Path.GetFileNameWithoutExtension(TrainedData[i]); Bitmap TrainingImage = new Bitmap(TrainedData[i]); string[] split = filename.Split('.'); for (int j = 0; j < 28; j++) { for (int k = 0; k < 28; k++) { if ((!TrainingImage.GetPixel(j, k).Name.Equals("ffffffff"))) { InputArray[j * 28 + k] = 1; } else { InputArray[j * 28 + k] = 0; } } } inputs[i] = InputArray; Outputs[i] = Convert.ToInt32(split[0]); InputArray = new double[784]; } IKernel kernel; kernel = new Polynomial(2, 0); ksvm = new MulticlassSupportVectorMachine(784, kernel, 2); MulticlassSupportVectorLearning ml = new MulticlassSupportVectorLearning(ksvm, inputs, Outputs); double complexity = 1; ///// set these three parameters Carefuly later double epsilon = 0.001; double tolerance = 0.2; ml.Algorithm = (svm, classInputs, classOutputs, i, j) => { var smo = new SequentialMinimalOptimization(svm, classInputs, classOutputs); smo.Complexity = complexity; /// Cost parameter for SVM smo.Epsilon = epsilon; smo.Tolerance = tolerance; return(smo); }; // Train the machines. It should take a while. double error = ml.Run(); }
public static string OCR(Bitmap imagem, string linguagem) { string texto = ""; using (TesseractEngine engine = new TesseractEngine(@"C:\GitHub\operacao-politica-supervisionada\OPS\temp\", linguagem, EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ"); engine.SetVariable("tessedit_unrej_any_wd", true); engine.SetVariable("applybox_learn_chars_and_char_frags_mode", true); engine.SetVariable("save_blob_choices", true); string sobreposto = ""; int ultimo = 12; using (Page page = engine.Process(imagem, PageSegMode.SingleLine)) { using (ResultIterator ri = page.GetIterator()) { do { string word = ri.GetText(PageIteratorLevel.Symbol); Tesseract.Rect bb; if (ri.TryGetBoundingBox(PageIteratorLevel.Symbol, out bb)) { if ((bb.Width > 13) && (bb.Height > 15) && (word.Trim() != "")) { while (bb.X1 > ultimo + 14) { texto += Resolver(sobreposto); sobreposto = ""; ultimo += 28; } //System.Web.HttpContext.Current.Response.Write(word + ": " + bb.X1 + "<br />\n"); if ((word != "Q") || (bb.Height <= 30)) { sobreposto += word; } else { sobreposto += "O"; } } } } while((ri.Next(PageIteratorLevel.Symbol))); if (texto.Length < 6) { texto += Resolver(sobreposto); while (texto.Length < 6) { texto += LetraAleatoria(); } } } } } return(texto); }
private static string OCRItalien(Bitmap b) { string res = ""; using (var engine = new TesseractEngine(null, "eng", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "1234567890+-"); engine.SetVariable("tessedit_unrej_any_wd", true); using (var page = engine.Process(b, PageSegMode.SingleLine)) res = page.GetText(); } return res; }
// Called for each strip, first cleans the image of noise and then performs OCR on the strip. private string recognizeStripName(string fileName, string allowedChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\'\"") { string noiseFreeImage = Path.Combine(Configuration.Instance.TempFolder, "temp.png"); // Make image white text on black background: cleanImageAndSave(fileName, noiseFreeImage, isNotInWhiteRange); // Use only specific characters: m_ocrEngine.SetVariable("tessedit_char_whitelist", allowedChars); string retVal = PerformOCR(noiseFreeImage, false); File.Delete(noiseFreeImage); return(retVal); }
public static TesseractEngine GetTesseractEngine() { if (m_ocr == null) { m_ocr = new TesseractEngine("./tessdata", "eng", EngineMode.TesseractAndCube); m_ocr.SetVariable("tessedit_char_whitelist", "01234567890"); m_ocr.SetVariable("tessedit_char_blacklist", "l"); return(m_ocr); } else { return(m_ocr); } }
public static string GetTextFromImage(string path) { var ocrtext = string.Empty; Bitmap image = new Bitmap(path); using (var engine = new TesseractEngine("tessdata", "eng+por", EngineMode.TesseractAndCube)) { engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzçÇãÃáàõéúí"); engine.SetVariable("tessedit_unrej_any_wd", true); using (var page = engine.Process(image)) ocrtext = page.GetText(); } return(ocrtext); }
private string OCR(Bitmap b) { string res = ""; using (var engine = new TesseractEngine(@"tessdata", "spa", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "1234567890abcdefghijklmnopqrstuvwxyz"); engine.SetVariable("tessedit_unrej_any_wd", true); using (var page = engine.Process(b, PageSegMode.SingleBlock)) res = page.GetText(); } return(res); }
private List <String> GetNewObscureWords(Bitmap imageFile) { Bitmap extractMissedObscureWordsInImage = ExtractMissedObscureWordsInImage(imageFile); TesseractEngine engine = new TesseractEngine(Program.TessDataDir, "eng", EngineMode.Default); engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz"); engine.SetVariable("tessedit_char_blacklist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()-_=\\|/?"); // no digit Page page = engine.Process(extractMissedObscureWordsInImage, PageSegMode.SingleBlock); String text = page.GetText(); return(text.Split(new [] { "\n" }, StringSplitOptions.RemoveEmptyEntries).ToList()); }
public static string OCR(Bitmap b) { string res = ""; using (var engine = new TesseractEngine(@"C:\RODRIGO\PROJETOS\ALTRAN\AeC - Framewors\AeC.Automacao.ImageProcess\AeC.ImageProcess\tessdata", "eng", EngineMode.TesseractOnly)) { engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvxz"); engine.SetVariable("tessedit_unrej_any_wd", true); using (var page = engine.Process(b, PageSegMode.SingleLine)) res = page.GetText(); } return(res); }
private string OCR(Bitmap b) { string res = ""; using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ"); engine.SetVariable("tessedit_unrej_any_wd", true); using (var page = engine.Process(b, PageSegMode.SingleLine)) res = page.GetText(); } return(res); }
private static void Initialize(ILogger _logger) { _logger.LogInformation("Initializing static Tesseract"); Tesseract = new TesseractEngine(@"./tessdata", "eng", EngineMode.TesseractAndLstm) { DefaultPageSegMode = PageSegMode.SingleBlock }; Tesseract.SetVariable("tessedit_char_whitelist", ALLOWED_CHARACTERS); Tesseract.SetVariable("user_patterns_file", @"./tessdata/cyber_patterns"); // \n (char or digit), \c (char), \d (digit), \p (punct), \a (lower), \A (upper), \* any number (\A\d and \d\A) Tesseract.SetVariable("user_words_file", @"./tessdata/cyber_words"); // BD, 1C, E9, 55, 7A, 1F Tesseract.SetVariable("load_system_dawg", false); // Don't load sys dictionary Tesseract.SetVariable("load_freq_dawg", false); // Don't load word freeuence }
private char[] RetrieveCharsByOcr(Bitmap data, string tessDataDir) { // ocr.SetVariable("tessedit_char_blacklist", "0123456789"); // no digit // ocr.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz"); // yes letters // ocr.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); TesseractEngine engine = new TesseractEngine(tessDataDir, "eng", EngineMode.Default); engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); engine.SetVariable("tessedit_char_blacklist", "abcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()-_=\\|/?"); // no digit Page page = engine.Process(data, PageSegMode.SingleWord); //Console.WriteLine("{0} : {1}", page.GetMeanConfidence(), page.GetText()); return(page.GetText().Replace("\n", "").ToLower().ToCharArray()); }
private void Form1_Load(object sender, EventArgs e) { //Il caricamento dei file per tesseract viene fatto una volta solo al momento dell'apertura del programma ocrengine = new TesseractEngine(@".\tessdata", "ita", EngineMode.Default); ocrengine.DefaultPageSegMode = PageSegMode.SingleBlock; ocrengine.SetVariable("tessedit_char_whitelist", "0123456789-_abcdfghmnpqrtuvzxyk"); }
public String DoOCR([FromForm] OcrModel request) { string name = request.Image.FileName; var image = request.Image; var imageStream = new MemoryStream(); if (image.Length > 0) { image.CopyTo(imageStream); } string tessPath = Path.Combine(trainedDataFolderName, ""); string result = ""; // TODO: Create one instance of engine and inject into app using (var engine = new TesseractEngine(tessPath, request.DestinationLanguage, EngineMode.Default)) { // whitelist numbers only engine.SetVariable("tessedit_char_whitelist", "0123456789"); var img = Pix.LoadFromMemory(imageStream.GetBuffer()); var page = engine.Process(img); result = page.GetText(); Console.WriteLine(result); } return(String.IsNullOrWhiteSpace(result) ? "Ocr is finished. Return empty" : result); }
public OcrManager() { _tesseract = new TesseractEngine(LangPath, LngStr); _tesseract.SetVariable("tessedit_char_blacklist", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz〈ヽ〉'〝<〟‥“\\ゐ=`”_.丿″\""); _tesseract.DefaultPageSegMode = PageSegMode.SingleBlock; }
static Tuple <string, string, string> OCRBitmapV3(Bitmap fullBmp) { string textResult = null; string blocksResult = null; string exceptionString = null; return(new Tuple <string, string, string>(textResult, blocksResult, exceptionString)); try { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "0123456789B"); using (var page = engine.Process(fullBmp)) { textResult = page.GetText(); blocksResult = ""; using (var iter = page.GetIterator()) { iter.Begin(); do { do { do { do { if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) { blocksResult += ConsoleWriteLine("<BLOCK>"); } blocksResult += ConsoleWrite(iter.GetText(PageIteratorLevel.Word)); blocksResult += ConsoleWrite("_"); if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word)) { blocksResult += ConsoleWriteLine("%"); } } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine)) { blocksResult += ConsoleWriteLine("§"); } } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para)); } while (iter.Next(PageIteratorLevel.Block)); } } } } catch (Exception e) { exceptionString = e.ToString(); } return(new Tuple <string, string, string>(textResult, blocksResult, exceptionString)); }
/// <summary> /// 获取验证码并自动识别 /// </summary> /// <returns></returns> public string GetVerficar(string path) { var result = ""; httpItem.URL = $"{urlbase}/verficar.do"; httpItem.ResultType = ResultType.Byte; httpItem.Cookie = cookie; httpResult = httpHelper.GetHtml(httpItem); var image = byteArrayToImage(httpResult.ResultByte); image.Save(path + "code.bmp"); cookie = httpResult?.Cookie.Replace("; Path=/erp", "").Trim(); using (var engine = new TesseractEngine(path + "tessdata", "eng", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "0123456789"); using (var pix = PixConverter.ToPix((Bitmap)image)) { using (var page = engine.Process(pix)) { result = page.GetText(); } } } result = result.Replace("\n", "").Replace(" ", "").Trim(); var timeSpan = (DateTime.Now.AddMilliseconds(expire * 1000) - DateTime.Now); RedisService.Instance.StringSet($"{RedisPrimaryKey.WebCrawlingCookie}/{uuid}", cookie, timeSpan); return(result); }
public override string RecognizeText(IList <Image> images, string lang) { string tessdata = Path.Combine(basedir, TESSDATA); using (TesseractEngine engine = new TesseractEngine(tessdata, lang, EngineMode.Default)) { engine.SetVariable("tessedit_create_hocr", Hocr ? "1" : "0"); Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode); StringBuilder strB = new StringBuilder(); int pageNum = 0; foreach (Image image in images) { pageNum++; using (Pix pix = ConvertBitmapToPix(image)) { using (Page page = engine.Process(pix, psm)) { string text = Hocr ? page.GetHOCRText(pageNum - 1) : page.GetText(); if (text == null) { return(String.Empty); } strB.Append(text); } } } return(strB.ToString().Replace("\n", Environment.NewLine)); } }
private static string GetCaptchaText(string captchaFilePath) { string captchaText = null; Pix captcha = null; try { captcha = Pix.LoadFromFile(captchaFilePath); } catch (Exception e) { Log.Error(e, "Error loading captcha file"); } if (captcha != null) { var grayCaptcha = captcha.ConvertRGBToGray(); var binarizedCaptcha = grayCaptcha.BinarizeSauvolaTiled(10, 0.75f, 1, 2); var engine = new TesseractEngine(Path.GetFullPath("tessdata"), "eng", EngineMode.Default); engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890"); var page = engine.Process(binarizedCaptcha, PageSegMode.SparseText); captchaText = page.GetText(); captchaText = Regex.Replace(captchaText, @"\s+", string.Empty); } return(captchaText); }
public static string OCR(Bitmap img) { TesseractEngine ocr = null; string sResult = ""; try { ocr = new TesseractEngine("./tessdata", "eng"); //初始化 (一定要放在tessdata資料夾下) ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //強迫Char List,較準確 Page page = ocr.Process(img, PageSegMode.SingleLine); sResult = page.GetText();//result page.Dispose(); } catch (Exception ex) { //MessageBox.Show(ex.Message); sResult = ""; } finally { ocr?.Dispose(); } return(sResult.Replace(" ", "")); }
// TessNet2 is based on Tesseract v2.04 and has not been updated since September 2009. // Tesseract 3 .NET wrapper is available here: https://github.com/charlesw/tesseract private string executeOCR_By_tesseract(Image srcImg) { try { //var img = Pix.LoadFromFile(imgPath); //var srcImg = System.Drawing.Image.FromFile(imgPath); var img = scaleImage(srcImg, 2.3, 2); // Scale up and extend the canvas to get a better result srcImg.Dispose(); tessEngine.SetVariable("tessedit_char_whitelist", "0123456789:"); // Digits & colons only //tessEngine.DefaultPageSegMode = PageSegMode.SingleWord; // Without this, the text may not be recognized at all (because of the narrow page margin) var page = tessEngine.Process(img, PageSegMode.SingleWord); // 如果使用SingleBlock, 识别结果中可能包含空格 var text = page.GetText().Trim(); page.Dispose(); img.Dispose(); Console.WriteLine(text); return(text); } catch (Exception e) { Console.WriteLine("Tesseract error: " + e.ToString()); return("Tesseract error: " + e.ToString()); } }
public Result <char> Process(Mat input) { try { var buff = new VectorOfByte(); CvInvoke.Imencode(".tiff", input, buff); using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { engine.DefaultPageSegMode = PageSegMode.SingleChar; engine.SetVariable("tessedit_char_whitelist", "0123456789ABEKMHOPCTYXDI"); using (var img = Pix.LoadTiffFromMemory(buff.ToArray())) { using (var page = engine.Process(img)) { var text = page.GetText()[0]; _debugLogger.Log(debugLogBuilder => debugLogBuilder.AddMessage("Letter").AddImage(input).AddMessage($"has been recognized as: {text}")); return(Result.Ok(text)); } } } } catch (Exception e) { return(Result.Fail <char>(e.Message)); } }
public static string ocrDigitLine(Bitmap source, int x, int y, int width, int height) // 한줄씩 읽어내고 "-" --> "." 으로 치환하고, 공백 제거등, 투약량에 쓰기 좋다. { string text; // var PrescriptionImage = CropedPrescription; using (var engine = new TesseractEngine(@"C:\Program Files\Tesseract-OCR\tessdata\", "kor", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "0123456789-."); // 숫자와 . - 만 인식하도록 설정 // using (var img = Pix.LoadFromFile(PrescriptionImage)) // { var roi = new Rect(x, y, width, height); // region of interest 좌표를 생성하고 using (var page = engine.Process(source, roi, PageSegMode.SingleLine)) // psm 옵션 설정 { text = page.GetText(); text = TextProcess.RemoveWhiteSpace(text); // text = TextProcess.DotReplace(text); // text = TextProcess.PointInsert(text); // Console.WriteLine("인식한 문자: \n{0}\n", text); // Console.Read(); } // } } return(text); }
public void CanSetDoubleVariable(string variableName, double variableValue) { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { var variableWasSet = engine.SetVariable(variableName, variableValue); Assert.That(variableWasSet, Is.True, "Failed to set variable '{0}'.", variableName); double result; if(engine.TryGetDoubleVariable(variableName, out result)) { Assert.That(result, Is.EqualTo(variableValue)); } else { Assert.Fail("Failed to retrieve value for '{0}'.", variableName); } } }
private static void SetVariablesAccordingToConfig(TesseractEngine engine, ZoneConfiguration barcodeConfig) { if (barcodeConfig.TextualDataFilter.FilterType == FilterType.Alpha) { engine.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ-"); } if (barcodeConfig.TextualDataFilter.FilterType == FilterType.AlphaOnly) { engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ-"); } if (barcodeConfig.TextualDataFilter.FilterType == FilterType.Digits) { engine.SetVariable("tessedit_char_whitelist", "0123456789"); } if (barcodeConfig.TextualDataFilter.FilterType == FilterType.Number) { engine.SetVariable("tessedit_char_whitelist", "0123456789,."); } }
public void CanSetBooleanVariable(bool variableValue) { const string VariableName = "classify_enable_learning"; using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { var variableWasSet = engine.SetVariable(VariableName, variableValue); Assert.That(variableWasSet, Is.True, "Failed to set variable '{0}'.", VariableName); bool result; if(engine.TryGetBoolVariable(VariableName, out result)) { Assert.That(result, Is.EqualTo(variableValue)); } else { Assert.Fail("Failed to retrieve value for '{0}'.", VariableName); } } }
public void WritesOutThresholdedImageWhenOCRing() { var expectedFilePath = Path.Combine(Environment.CurrentDirectory, "tessinput.tif"); if (File.Exists(expectedFilePath)) { File.Delete(expectedFilePath); } using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { engine.SetVariable("tessedit_write_images", true); using (var img = Pix.LoadFromFile("./phototest.tif")) { using (var page = engine.Process(img)) { var region1Text = page.GetText(); Assert.That(File.Exists(expectedFilePath)); } } } }
public void CanSetClassifyBlnNumericModeVariable() { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { engine.SetVariable("classify_bln_numeric_mode", 1); using (var img = Pix.LoadFromFile("./Data/processing/numbers.png")) { using (var page = engine.Process(img)) { var text = page.GetText(); const string expectedText = "1234567890\n\n"; Assert.That(text, Is.EqualTo(expectedText)); } } } }