Пример #1
0
 private void DefaultSettings()
 {
     engine.SetVariable("tessedit_char_whitelist", "0123456789"); // show only digits
     engine.SetVariable("load_system_dawg", false);               // disable dictionary values
     engine.SetVariable("load_freq_dawg", false);                 // disable dictionary values
     engine.DefaultPageSegMode = PageSegMode.SingleWord;
 }
Пример #2
0
        /// <summary>
        /// 识别印刷票号
        /// </summary>
        /// <param name="imgs"></param>
        /// <returns></returns>
        private string OCRTicketNo(IList <Bitmap> imgs)
        {
            string res = "";

            using (var engineLetter = new TesseractEngine(@"tessdata", "eng", EngineMode.TesseractOnly))
            {
                engineLetter.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
                engineLetter.SetVariable("tessedit_unrej_any_wd", true);
                engineLetter.DefaultPageSegMode = PageSegMode.SingleChar;
                using (var page = engineLetter.Process(imgs[0], PageSegMode.SingleChar))
                    res += page.GetText().Substring(0, 1);
            }

            imgs.RemoveAt(0);
            using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.TesseractOnly))
            {
                engine.SetVariable("tessedit_char_whitelist", "1234567890");
                engine.SetVariable("tessedit_unrej_any_wd", true);
                engine.DefaultPageSegMode = PageSegMode.SingleChar;

                foreach (Bitmap img in imgs)
                {
                    using (var page = engine.Process(img, PageSegMode.SingleChar))
                        res += page.GetText().Substring(0, 1);
                }
            }

            Console.WriteLine("OCR Result = " + res);
            return(res);
        }
        public static string OCR(Bitmap b)
        {
            try
            {
                string res  = string.Empty;
                string path = $@"{Environment.CurrentDirectory}\tessdata\";

                using (var engine = new TesseractEngine(path, "eng"))
                {
                    string letters = "abcdefghijklmnopqrstuvwxyz";
                    string numbers = "0123456789";
                    engine.SetVariable("tessedit_char_whitelist", $"{numbers}{letters}{letters.ToUpper()}");
                    engine.SetVariable("tessedit_unrej_any_wd", true);
                    engine.SetVariable("tessedit_adapt_to_char_fragments", true);
                    engine.SetVariable("tessedit_redo_xheight", true);
                    engine.SetVariable("chop_enable", true);


                    Bitmap x = b.Clone(new Rectangle(0, 0, b.Width, b.Height), System.Drawing.Imaging.PixelFormat.Format24bppRgb);

                    using (var page = engine.Process(x, PageSegMode.SingleLine))
                        res = page.GetText().Replace(" ", "").Trim();
                }

                return(res);
            }
            catch (Exception ex)
            {
                ////MessageBox.Show($"Erro: {ex.Message}");
                return(null);
            }
        }
Пример #4
0
        // 464/19516
        static void Main(string[] args)
        {
            int hit, all;

            hit = all = 0;
            using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
            {
                foreach (var file in Directory.EnumerateFiles(@"D:\dataset\easy\first try\read_and_segmented"))
                {
                    string code = Path.GetFileNameWithoutExtension(file);
                    engine.DefaultPageSegMode = PageSegMode.SingleLine;
                    engine.SetVariable("load_system_dawg", "false");
                    engine.SetVariable("load_freq_dawg", "false");
                    engine.SetVariable("tessedit_char_whitelist", "-2346789BCDFGHJKMPQRTVWXY");
                    using (var img = Pix.LoadFromFile(file))
                        using (var page = engine.Process(img))
                        {
                            var text = page.GetText().Trim();
                            if (text == code)
                            {
                                ++hit;
                            }
                        }


                    ++all;
                }
            }
            Console.WriteLine($"{hit}/{all}");
            Console.ReadKey();
        }
Пример #5
0
        /// <summary>
        /// Reads tessdata/configs/tess_configvars and SetVariable on Tesseract engine.
        /// This only works for non-init parameters (@see <a href="https://code.google.com/p/tesseract-ocr/wiki/ControlParams">ControlParams</a>).
        /// </summary>
        /// <param name="engine"></param>
        void ControlParameters(TesseractEngine engine)
        {
            string configsFilePath = Path.Combine(Datapath, "tessdata/configs/" + CONFIGVARS_FILE);

            if (!File.Exists(configsFilePath))
            {
                return;
            }

            string[] lines = File.ReadAllLines(configsFilePath);
            foreach (string line in lines)
            {
                if (!line.Trim().StartsWith("#"))
                {
                    try
                    {
                        string[] keyValuePair = line.Trim().Split(new char[0], StringSplitOptions.RemoveEmptyEntries);
                        string   value        = keyValuePair[1];
                        if (value == "T" || value == "F")
                        {
                            engine.SetVariable(keyValuePair[0], value == "T" ? true : false);
                        }
                        else
                        {
                            engine.SetVariable(keyValuePair[0], keyValuePair[1]);
                        }
                    }
                    catch
                    {
                        //ignore and continue on
                    }
                }
            }
        }
Пример #6
0
        private void InitTesseract()
        {
            var tesseractData = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata");

            _englishOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm);
            _englishOcr.DefaultPageSegMode = PageSegMode.SingleLine;
            _englishOcr.SetVariable("tessedit_char_whitelist", "qwertyuiopasdfghjklzxcvbnm QWERTYUIOPASDFGHJKLZXCVBNM");
            _englishOcr.SetVariable("tessedit_zero_rejection", true);
            _englishOcr.SetVariable("load_freq_dawg", false);
            _englishOcr.SetVariable("load_system_dawg", false);
            _englishPunctOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm);
            _englishPunctOcr.DefaultPageSegMode = PageSegMode.SingleLine;
            _englishPunctOcr.SetVariable("tessedit_char_whitelist", "qwertyuiopasdfghjklzxcvbnm QWERTYUIOPASDFGHJKLZXCVBNM0123456789/*-+:,.%");
            _englishPunctOcr.SetVariable("tessedit_zero_rejection", true);
            _englishPunctOcr.SetVariable("load_freq_dawg", false);
            _englishPunctOcr.SetVariable("load_system_dawg", false);
            _numbersOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm);
            _numbersOcr.DefaultPageSegMode = PageSegMode.SingleLine;
            _numbersOcr.SetVariable("tessedit_char_whitelist", "0123456789");
            _numbersOcr.SetVariable("tessedit_zero_rejection", true);
            _numbersOcr.SetVariable("load_freq_dawg", false);
            _numbersOcr.SetVariable("load_system_dawg", false);
            _numbersScaledOcr = new TesseractEngine(tesseractData, "eng", EngineMode.TesseractAndLstm);
            _numbersScaledOcr.DefaultPageSegMode = PageSegMode.SingleLine;
            _numbersScaledOcr.SetVariable("tessedit_char_whitelist", "0123456789.,KMB");
            _numbersScaledOcr.SetVariable("tessedit_zero_rejection", true);
            _numbersScaledOcr.SetVariable("load_freq_dawg", false);
            _numbersScaledOcr.SetVariable("load_system_dawg", false);
            Logger.Info("Tessract engines initialized");
        }
Пример #7
0
        public static void Run()
        {
            var bitmap = new Bitmap("E:\\dev\\venomsw\\images\\cropped_rune.png");
            var scaled = ScaleBitmap(bitmap, bitmap.Width * 2, bitmap.Height * 2);

            //TODO debug only
            if (analyzer.ShouldGetRune(scaled))
            {
                return;
            }



            TesseractEngine engine = new TesseractEngine(@"E:\\dev\\venomsw\\venomsw\\tessdata", "eng",
                                                         EngineMode.Default, "venom");

            engine.SetVariable("language_model_penalty_non_freq_dict_word", "1");
            engine.SetVariable("language_model_penalty_non_dict_word", "1");

            using (Page page = engine.Process(scaled, PageSegMode.SingleBlock))
            {
                Console.WriteLine(page.GetText());
            }

            scaled.Save("E:\\dev\\venomsw\\images\\cropped_rune3_nobg.png");
        }
        public SVM(string TrainedDataInputFile)  //// Do training for all existing trained Data
        {
            _engine = new TesseractEngine(@"./tessdata3", "eng", EngineMode.TesseractAndCube);
            _engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
            _engine.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./");

            string[]   TrainedData = Directory.GetFiles(TrainedDataInputFile, "*.png");
            double[][] inputs      = new double[TrainedData.Length][]; ///
            double[]   InputArray  = new double[784];
            int[]      Outputs     = new int[TrainedData.Length];

            for (int i = 0; i < TrainedData.Length; i++)
            {
                string   filename      = Path.GetFileNameWithoutExtension(TrainedData[i]);
                Bitmap   TrainingImage = new Bitmap(TrainedData[i]);
                string[] split         = filename.Split('.');
                for (int j = 0; j < 28; j++)
                {
                    for (int k = 0; k < 28; k++)
                    {
                        if ((!TrainingImage.GetPixel(j, k).Name.Equals("ffffffff")))
                        {
                            InputArray[j * 28 + k] = 1;
                        }
                        else
                        {
                            InputArray[j * 28 + k] = 0;
                        }
                    }
                }

                inputs[i]  = InputArray;
                Outputs[i] = Convert.ToInt32(split[0]);
                InputArray = new double[784];
            }

            IKernel kernel;

            kernel = new Polynomial(2, 0);
            ksvm   = new MulticlassSupportVectorMachine(784, kernel, 2);
            MulticlassSupportVectorLearning ml = new MulticlassSupportVectorLearning(ksvm, inputs, Outputs);

            double complexity = 1;   ///// set these three parameters Carefuly later
            double epsilon    = 0.001;
            double tolerance  = 0.2;

            ml.Algorithm = (svm, classInputs, classOutputs, i, j) =>
            {
                var smo = new SequentialMinimalOptimization(svm, classInputs, classOutputs);
                smo.Complexity = complexity;  /// Cost parameter for SVM
                smo.Epsilon    = epsilon;
                smo.Tolerance  = tolerance;
                return(smo);
            };

            // Train the machines. It should take a while.
            double error = ml.Run();
        }
Пример #9
0
    public static string OCR(Bitmap imagem, string linguagem)
    {
        string texto = "";

        using (TesseractEngine engine = new TesseractEngine(@"C:\GitHub\operacao-politica-supervisionada\OPS\temp\", linguagem, EngineMode.Default)) {
            engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ");
            engine.SetVariable("tessedit_unrej_any_wd", true);
            engine.SetVariable("applybox_learn_chars_and_char_frags_mode", true);
            engine.SetVariable("save_blob_choices", true);

            string sobreposto = "";
            int    ultimo     = 12;
            using (Page page = engine.Process(imagem, PageSegMode.SingleLine)) {
                using (ResultIterator ri = page.GetIterator()) {
                    do
                    {
                        string         word = ri.GetText(PageIteratorLevel.Symbol);
                        Tesseract.Rect bb;
                        if (ri.TryGetBoundingBox(PageIteratorLevel.Symbol, out bb))
                        {
                            if ((bb.Width > 13) && (bb.Height > 15) && (word.Trim() != ""))
                            {
                                while (bb.X1 > ultimo + 14)
                                {
                                    texto     += Resolver(sobreposto);
                                    sobreposto = "";
                                    ultimo    += 28;
                                }
                                //System.Web.HttpContext.Current.Response.Write(word + ": " + bb.X1 + "<br />\n");
                                if ((word != "Q") || (bb.Height <= 30))
                                {
                                    sobreposto += word;
                                }
                                else
                                {
                                    sobreposto += "O";
                                }
                            }
                        }
                    } while((ri.Next(PageIteratorLevel.Symbol)));
                    if (texto.Length < 6)
                    {
                        texto += Resolver(sobreposto);
                        while (texto.Length < 6)
                        {
                            texto += LetraAleatoria();
                        }
                    }
                }
            }
        }
        return(texto);
    }
Пример #10
0
        private static string OCRItalien(Bitmap b)
        {
            string res = "";
            using (var engine = new TesseractEngine(null, "eng", EngineMode.Default))
            {
                engine.SetVariable("tessedit_char_whitelist", "1234567890+-");
                engine.SetVariable("tessedit_unrej_any_wd", true);

                using (var page = engine.Process(b, PageSegMode.SingleLine))
                    res = page.GetText();
            }
            return res;
        }
Пример #11
0
        // Called for each strip, first cleans the image of noise and then performs OCR on the strip.
        private string recognizeStripName(string fileName, string allowedChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\'\"")
        {
            string noiseFreeImage = Path.Combine(Configuration.Instance.TempFolder, "temp.png");

            // Make image white text on black background:
            cleanImageAndSave(fileName, noiseFreeImage, isNotInWhiteRange);

            // Use only specific characters:
            m_ocrEngine.SetVariable("tessedit_char_whitelist", allowedChars);
            string retVal = PerformOCR(noiseFreeImage, false);

            File.Delete(noiseFreeImage);
            return(retVal);
        }
Пример #12
0
 public static TesseractEngine GetTesseractEngine()
 {
     if (m_ocr == null)
     {
         m_ocr = new TesseractEngine("./tessdata", "eng", EngineMode.TesseractAndCube);
         m_ocr.SetVariable("tessedit_char_whitelist", "01234567890");
         m_ocr.SetVariable("tessedit_char_blacklist", "l");
         return(m_ocr);
     }
     else
     {
         return(m_ocr);
     }
 }
Пример #13
0
        public static string GetTextFromImage(string path)
        {
            var    ocrtext = string.Empty;
            Bitmap image   = new Bitmap(path);

            using (var engine = new TesseractEngine("tessdata", "eng+por", EngineMode.TesseractAndCube))
            {
                engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzçÇãÃáàõéúí");
                engine.SetVariable("tessedit_unrej_any_wd", true);
                using (var page = engine.Process(image))
                    ocrtext = page.GetText();
            }
            return(ocrtext);
        }
Пример #14
0
        private string OCR(Bitmap b)
        {
            string res = "";

            using (var engine = new TesseractEngine(@"tessdata", "spa", EngineMode.Default))
            {
                engine.SetVariable("tessedit_char_whitelist", "1234567890abcdefghijklmnopqrstuvwxyz");
                engine.SetVariable("tessedit_unrej_any_wd", true);

                using (var page = engine.Process(b, PageSegMode.SingleBlock))
                    res = page.GetText();
            }
            return(res);
        }
Пример #15
0
        private List <String> GetNewObscureWords(Bitmap imageFile)
        {
            Bitmap extractMissedObscureWordsInImage = ExtractMissedObscureWordsInImage(imageFile);

            TesseractEngine engine = new TesseractEngine(Program.TessDataDir, "eng", EngineMode.Default);

            engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz");
            engine.SetVariable("tessedit_char_blacklist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()-_=\\|/?"); // no digit
            Page page = engine.Process(extractMissedObscureWordsInImage, PageSegMode.SingleBlock);

            String text = page.GetText();

            return(text.Split(new [] { "\n" }, StringSplitOptions.RemoveEmptyEntries).ToList());
        }
Пример #16
0
        public static string OCR(Bitmap b)
        {
            string res = "";

            using (var engine = new TesseractEngine(@"C:\RODRIGO\PROJETOS\ALTRAN\AeC - Framewors\AeC.Automacao.ImageProcess\AeC.ImageProcess\tessdata", "eng", EngineMode.TesseractOnly))
            {
                engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvxz");
                engine.SetVariable("tessedit_unrej_any_wd", true);

                using (var page = engine.Process(b, PageSegMode.SingleLine))
                    res = page.GetText();
            }
            return(res);
        }
Пример #17
0
        private string OCR(Bitmap b)
        {
            string res = "";

            using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default))
            {
                engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ");
                engine.SetVariable("tessedit_unrej_any_wd", true);

                using (var page = engine.Process(b, PageSegMode.SingleLine))
                    res = page.GetText();
            }
            return(res);
        }
Пример #18
0
        private static void Initialize(ILogger _logger)
        {
            _logger.LogInformation("Initializing static Tesseract");
            Tesseract = new TesseractEngine(@"./tessdata", "eng", EngineMode.TesseractAndLstm)
            {
                DefaultPageSegMode = PageSegMode.SingleBlock
            };

            Tesseract.SetVariable("tessedit_char_whitelist", ALLOWED_CHARACTERS);
            Tesseract.SetVariable("user_patterns_file", @"./tessdata/cyber_patterns");
            // \n (char or digit), \c (char), \d (digit), \p (punct), \a (lower), \A (upper), \* any number (\A\d and \d\A)
            Tesseract.SetVariable("user_words_file", @"./tessdata/cyber_words"); // BD, 1C, E9, 55, 7A, 1F
            Tesseract.SetVariable("load_system_dawg", false);                    // Don't load sys dictionary
            Tesseract.SetVariable("load_freq_dawg", false);                      // Don't load word freeuence
        }
Пример #19
0
        private char[] RetrieveCharsByOcr(Bitmap data, string tessDataDir)
        {
            //            ocr.SetVariable("tessedit_char_blacklist", "0123456789"); // no digit
            //            ocr.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz"); // yes letters
            //            ocr.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
            TesseractEngine engine = new TesseractEngine(tessDataDir, "eng", EngineMode.Default);

            engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
            engine.SetVariable("tessedit_char_blacklist", "abcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()-_=\\|/?"); // no digit
            Page page = engine.Process(data, PageSegMode.SingleWord);

            //Console.WriteLine("{0} : {1}", page.GetMeanConfidence(), page.GetText());

            return(page.GetText().Replace("\n", "").ToLower().ToCharArray());
        }
Пример #20
0
 private void Form1_Load(object sender, EventArgs e)
 {
     //Il caricamento dei file per tesseract viene fatto una volta solo al momento dell'apertura del programma
     ocrengine = new TesseractEngine(@".\tessdata", "ita", EngineMode.Default);
     ocrengine.DefaultPageSegMode = PageSegMode.SingleBlock;
     ocrengine.SetVariable("tessedit_char_whitelist", "0123456789-_abcdfghmnpqrtuvzxyk");
 }
Пример #21
0
        public String DoOCR([FromForm] OcrModel request)
        {
            string name        = request.Image.FileName;
            var    image       = request.Image;
            var    imageStream = new MemoryStream();

            if (image.Length > 0)
            {
                image.CopyTo(imageStream);
            }

            string tessPath = Path.Combine(trainedDataFolderName, "");
            string result   = "";

            // TODO: Create one instance of engine and inject into app
            using (var engine = new TesseractEngine(tessPath, request.DestinationLanguage, EngineMode.Default))
            {
                // whitelist numbers only
                engine.SetVariable("tessedit_char_whitelist", "0123456789");
                var img  = Pix.LoadFromMemory(imageStream.GetBuffer());
                var page = engine.Process(img);
                result = page.GetText();
                Console.WriteLine(result);
            }

            return(String.IsNullOrWhiteSpace(result) ? "Ocr is finished. Return empty" : result);
        }
Пример #22
0
 public OcrManager()
 {
     _tesseract = new TesseractEngine(LangPath, LngStr);
     _tesseract.SetVariable("tessedit_char_blacklist",
                            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz〈ヽ〉'〝<〟‥“\\ゐ=`”_.丿″\"");
     _tesseract.DefaultPageSegMode = PageSegMode.SingleBlock;
 }
Пример #23
0
        static Tuple <string, string, string> OCRBitmapV3(Bitmap fullBmp)
        {
            string textResult      = null;
            string blocksResult    = null;
            string exceptionString = null;

            return(new Tuple <string, string, string>(textResult, blocksResult, exceptionString));

            try
            {
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    engine.SetVariable("tessedit_char_whitelist", "0123456789B");
                    using (var page = engine.Process(fullBmp))
                    {
                        textResult = page.GetText();

                        blocksResult = "";
                        using (var iter = page.GetIterator())
                        {
                            iter.Begin();
                            do
                            {
                                do
                                {
                                    do
                                    {
                                        do
                                        {
                                            if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                            {
                                                blocksResult += ConsoleWriteLine("<BLOCK>");
                                            }

                                            blocksResult += ConsoleWrite(iter.GetText(PageIteratorLevel.Word));
                                            blocksResult += ConsoleWrite("_");

                                            if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                                            {
                                                blocksResult += ConsoleWriteLine("%");
                                            }
                                        } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                                        if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                                        {
                                            blocksResult += ConsoleWriteLine("§");
                                        }
                                    } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                                } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                            } while (iter.Next(PageIteratorLevel.Block));
                        }
                    }
                }
            }
            catch (Exception e)
            {
                exceptionString = e.ToString();
            }
            return(new Tuple <string, string, string>(textResult, blocksResult, exceptionString));
        }
Пример #24
0
        /// <summary>
        /// 获取验证码并自动识别
        /// </summary>
        /// <returns></returns>
        public string GetVerficar(string path)
        {
            var result = "";

            httpItem.URL        = $"{urlbase}/verficar.do";
            httpItem.ResultType = ResultType.Byte;
            httpItem.Cookie     = cookie;
            httpResult          = httpHelper.GetHtml(httpItem);
            var image = byteArrayToImage(httpResult.ResultByte);

            image.Save(path + "code.bmp");

            cookie = httpResult?.Cookie.Replace("; Path=/erp", "").Trim();

            using (var engine = new TesseractEngine(path + "tessdata", "eng", EngineMode.Default))
            {
                engine.SetVariable("tessedit_char_whitelist", "0123456789");
                using (var pix = PixConverter.ToPix((Bitmap)image))
                {
                    using (var page = engine.Process(pix))
                    {
                        result = page.GetText();
                    }
                }
            }
            result = result.Replace("\n", "").Replace(" ", "").Trim();

            var timeSpan = (DateTime.Now.AddMilliseconds(expire * 1000) - DateTime.Now);

            RedisService.Instance.StringSet($"{RedisPrimaryKey.WebCrawlingCookie}/{uuid}", cookie, timeSpan);

            return(result);
        }
Пример #25
0
        public override string RecognizeText(IList <Image> images, string lang)
        {
            string tessdata = Path.Combine(basedir, TESSDATA);

            using (TesseractEngine engine = new TesseractEngine(tessdata, lang, EngineMode.Default))
            {
                engine.SetVariable("tessedit_create_hocr", Hocr ? "1" : "0");
                Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode);

                StringBuilder strB    = new StringBuilder();
                int           pageNum = 0;

                foreach (Image image in images)
                {
                    pageNum++;
                    using (Pix pix = ConvertBitmapToPix(image))
                    {
                        using (Page page = engine.Process(pix, psm))
                        {
                            string text = Hocr ? page.GetHOCRText(pageNum - 1) : page.GetText();

                            if (text == null)
                            {
                                return(String.Empty);
                            }
                            strB.Append(text);
                        }
                    }
                }

                return(strB.ToString().Replace("\n", Environment.NewLine));
            }
        }
Пример #26
0
        private static string GetCaptchaText(string captchaFilePath)
        {
            string captchaText = null;
            Pix    captcha     = null;

            try
            {
                captcha = Pix.LoadFromFile(captchaFilePath);
            }
            catch (Exception e)
            {
                Log.Error(e, "Error loading captcha file");
            }

            if (captcha != null)
            {
                var grayCaptcha      = captcha.ConvertRGBToGray();
                var binarizedCaptcha = grayCaptcha.BinarizeSauvolaTiled(10, 0.75f, 1, 2);

                var engine = new TesseractEngine(Path.GetFullPath("tessdata"), "eng", EngineMode.Default);
                engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890");
                var page = engine.Process(binarizedCaptcha, PageSegMode.SparseText);

                captchaText = page.GetText();
                captchaText = Regex.Replace(captchaText, @"\s+", string.Empty);
            }

            return(captchaText);
        }
Пример #27
0
        public static string OCR(Bitmap img)
        {
            TesseractEngine ocr     = null;
            string          sResult = "";

            try
            {
                ocr = new TesseractEngine("./tessdata", "eng");           //初始化 (一定要放在tessdata資料夾下)
                ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //強迫Char List,較準確

                Page page = ocr.Process(img, PageSegMode.SingleLine);
                sResult = page.GetText();//result
                page.Dispose();
            }
            catch (Exception ex)
            {
                //MessageBox.Show(ex.Message);
                sResult = "";
            }
            finally
            {
                ocr?.Dispose();
            }
            return(sResult.Replace(" ", ""));
        }
Пример #28
0
        // TessNet2 is based on Tesseract v2.04 and has not been updated since September 2009.
        // Tesseract 3 .NET wrapper is available here: https://github.com/charlesw/tesseract
        private string executeOCR_By_tesseract(Image srcImg)
        {
            try
            {
                //var img = Pix.LoadFromFile(imgPath);
                //var srcImg = System.Drawing.Image.FromFile(imgPath);
                var img = scaleImage(srcImg, 2.3, 2);        // Scale up and extend the canvas to get a better result
                srcImg.Dispose();

                tessEngine.SetVariable("tessedit_char_whitelist", "0123456789:");   // Digits & colons only
                //tessEngine.DefaultPageSegMode = PageSegMode.SingleWord;     // Without this, the text may not be recognized at all (because of the narrow page margin)

                var page = tessEngine.Process(img, PageSegMode.SingleWord);     // 如果使用SingleBlock, 识别结果中可能包含空格
                var text = page.GetText().Trim();

                page.Dispose();
                img.Dispose();

                Console.WriteLine(text);
                return(text);
            }
            catch (Exception e)
            {
                Console.WriteLine("Tesseract error: " + e.ToString());
                return("Tesseract error: " + e.ToString());
            }
        }
        public Result <char> Process(Mat input)
        {
            try
            {
                var buff = new VectorOfByte();
                CvInvoke.Imencode(".tiff", input, buff);
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    engine.DefaultPageSegMode = PageSegMode.SingleChar;
                    engine.SetVariable("tessedit_char_whitelist", "0123456789ABEKMHOPCTYXDI");
                    using (var img = Pix.LoadTiffFromMemory(buff.ToArray()))
                    {
                        using (var page = engine.Process(img))
                        {
                            var text = page.GetText()[0];

                            _debugLogger.Log(debugLogBuilder => debugLogBuilder.AddMessage("Letter").AddImage(input).AddMessage($"has been recognized as: {text}"));

                            return(Result.Ok(text));
                        }
                    }
                }
            }
            catch (Exception e)
            {
                return(Result.Fail <char>(e.Message));
            }
        }
Пример #30
0
        public static string ocrDigitLine(Bitmap source, int x, int y, int width, int height)  // 한줄씩 읽어내고 "-" --> "." 으로 치환하고, 공백 제거등, 투약량에 쓰기 좋다.
        {
            string text;

            // var PrescriptionImage = CropedPrescription;
            using (var engine = new TesseractEngine(@"C:\Program Files\Tesseract-OCR\tessdata\", "kor", EngineMode.Default))
            {
                engine.SetVariable("tessedit_char_whitelist", "0123456789-."); // 숫자와 . - 만 인식하도록 설정

                //     using (var img = Pix.LoadFromFile(PrescriptionImage))
                //      {
                var roi = new Rect(x, y, width, height);                               // region of interest 좌표를 생성하고
                using (var page = engine.Process(source, roi, PageSegMode.SingleLine)) // psm 옵션 설정
                {
                    text = page.GetText();

                    text = TextProcess.RemoveWhiteSpace(text);
                    //      text = TextProcess.DotReplace(text);
                    //     text = TextProcess.PointInsert(text);

                    //      Console.WriteLine("인식한 문자: \n{0}\n", text);
                    //   Console.Read();
                }
                //         }
            }
            return(text);
        }
Пример #31
0
 public void CanSetDoubleVariable(string variableName, double variableValue)
 {
     using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) {
         var variableWasSet = engine.SetVariable(variableName, variableValue);
         Assert.That(variableWasSet, Is.True, "Failed to set variable '{0}'.", variableName);
         double result;
         if(engine.TryGetDoubleVariable(variableName, out result)) {
             Assert.That(result, Is.EqualTo(variableValue));
         } else {
             Assert.Fail("Failed to retrieve value for '{0}'.", variableName);
         }
     }
 }
Пример #32
0
        private static void SetVariablesAccordingToConfig(TesseractEngine engine, ZoneConfiguration barcodeConfig)
        {
            if (barcodeConfig.TextualDataFilter.FilterType == FilterType.Alpha)
            {
                engine.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ-");
            }

            if (barcodeConfig.TextualDataFilter.FilterType == FilterType.AlphaOnly)
            {
                engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ-");
            }

            if (barcodeConfig.TextualDataFilter.FilterType == FilterType.Digits)
            {
                engine.SetVariable("tessedit_char_whitelist", "0123456789");
            }

            if (barcodeConfig.TextualDataFilter.FilterType == FilterType.Number)
            {
                engine.SetVariable("tessedit_char_whitelist", "0123456789,.");
            }
        }
Пример #33
0
 public void CanSetBooleanVariable(bool variableValue)
 {
     const string VariableName = "classify_enable_learning";
     using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) {
         var variableWasSet = engine.SetVariable(VariableName, variableValue);
         Assert.That(variableWasSet, Is.True, "Failed to set variable '{0}'.", VariableName);
         bool result;
         if(engine.TryGetBoolVariable(VariableName, out result)) {
             Assert.That(result, Is.EqualTo(variableValue));
         } else {
             Assert.Fail("Failed to retrieve value for '{0}'.", VariableName);
         }
     }
 }
Пример #34
0
        public void WritesOutThresholdedImageWhenOCRing()
        {
            var expectedFilePath = Path.Combine(Environment.CurrentDirectory, "tessinput.tif");

            if (File.Exists(expectedFilePath)) {
                File.Delete(expectedFilePath);
            }

            using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) {
                engine.SetVariable("tessedit_write_images", true);

                using (var img = Pix.LoadFromFile("./phototest.tif")) {
                    using (var page = engine.Process(img)) {
                        var region1Text = page.GetText();

                        Assert.That(File.Exists(expectedFilePath));
                    }
                }
            }
        }
Пример #35
0
        public void CanSetClassifyBlnNumericModeVariable()
        {
            using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) {
                engine.SetVariable("classify_bln_numeric_mode", 1);

                using (var img = Pix.LoadFromFile("./Data/processing/numbers.png")) {
                    using (var page = engine.Process(img)) {
                        var text = page.GetText();

                        const string expectedText = "1234567890\n\n";

                        Assert.That(text, Is.EqualTo(expectedText));
                    }
                }
            }
        }