Ejemplo n.º 1
0
        private string Tesseract3DoOcrViaExe(Bitmap bmp, string language, string psmMode)
        {
            // change yellow color to white - easier for Tesseract
            var nbmp = new NikseBitmap(bmp);
            nbmp.ReplaceYellowWithWhite(); // optimized replace
            bool useHocr = true;

            string tempTiffFileName = Path.GetTempPath() + Guid.NewGuid().ToString() + ".png";
            var b = nbmp.GetBitmap();
            b.Save(tempTiffFileName, System.Drawing.Imaging.ImageFormat.Png);
            string tempTextFileName = Path.GetTempPath() + Guid.NewGuid().ToString();
            b.Dispose();

            var process = new Process();
            process.StartInfo = new ProcessStartInfo(Configuration.TesseractFolder + "tesseract.exe");
            process.StartInfo.UseShellExecute = true;
            process.StartInfo.Arguments = "\"" + tempTiffFileName + "\" \"" + tempTextFileName + "\" -l " + language;

            if (checkBoxTesseractMusicOn.Checked)
                process.StartInfo.Arguments += "+music";

            if (!string.IsNullOrEmpty(psmMode))
                process.StartInfo.Arguments += " " + psmMode.Trim();

            if (useHocr)
                process.StartInfo.Arguments += " hocr";
            process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;

            if (Utilities.IsRunningOnLinux() || Utilities.IsRunningOnMac())
            {
                process.StartInfo.UseShellExecute = false;
                process.StartInfo.RedirectStandardError = true;
                process.StartInfo.FileName = "tesseract";
            }
            else
            {
                process.StartInfo.WorkingDirectory = (Configuration.TesseractFolder);
            }

            try
            {
                process.Start();
            }
            catch
            {
                MessageBox.Show("Unable to start 'tesseract' - make sure tesseract-ocr 3.x is installed!");
                throw;
            }
            process.WaitForExit(5000);

            string result = string.Empty;
            if (useHocr)
            {
                string outputFileName = tempTextFileName + ".html";
                try
                {
                    if (File.Exists(outputFileName))
                    {
                        result = File.ReadAllText(outputFileName);
                        result = ParseHocr(result);
                        File.Delete(outputFileName);
                    }
                    File.Delete(tempTiffFileName);
                }
                catch
                {
                }
            }
            else
            {
                string outputFileName = tempTextFileName + ".txt";
                try
                {
                    if (File.Exists(outputFileName))
                    {
                        result = File.ReadAllText(outputFileName);
                        File.Delete(outputFileName);
                    }
                    File.Delete(tempTiffFileName);
                }
                catch
                {
                }
            }
            return result;
        }