Esempio n. 1
0
        private void doScan(string targetFolderName, string resultFileName)
        {
            var bitmapConverter = new Tesseract.BitmapToPixConverter();
            var tesseract       = new Tesseract.TesseractEngine(@"C:\Program Files (x86)\Tesseract-OCR\tessdata", "jpn", @"digits");

            //tesseract.SetVariable("tessedit_char_whitelist", "0123456789");
            tesseract.DefaultPageSegMode = Tesseract.PageSegMode.SingleChar;
            MessageBox.Show(tesseract.Version);
            var fileNames = Directory.GetFiles(targetFolderName, "*.jpg");

            Array.ForEach(fileNames, fileName => {
                var originalBitmap = new System.Drawing.Bitmap(fileName);
                //MessageBox.Show($"{ originalBitmap.Width }*{ originalBitmap.Height }");
                var targetRect = new System.Drawing.Rectangle(2980, 140, 3700 - 2980, 330 - 140);
///                var targetRect = new System.Drawing.Rectangle(1500, 75, 1875 - 1500, 170 - 75);
                var targetBitmap = originalBitmap.Clone(targetRect, System.Drawing.Imaging.PixelFormat.DontCare);
                using (var page = tesseract.Process(bitmapConverter.Convert(targetBitmap)))
                {
                    MessageBox.Show(page.GetText());
                }

                var targetRect2 = new System.Drawing.Rectangle(1390, 1370, 2390 - 1390, 1530 - 1370);
///                var targetRect2 = new System.Drawing.Rectangle(720, 690, 1230 - 720, 800 - 690);
                var targetBitmap2 = originalBitmap.Clone(targetRect2, System.Drawing.Imaging.PixelFormat.DontCare);
                using (var page = tesseract.Process(bitmapConverter.Convert(targetBitmap2)))
                {
                    MessageBox.Show(page.GetText());
                }
            });
        }
Esempio n. 2
0
        public static string OCRImageWithTesseract(Bitmap theBmp)
        {
            var ocrText = string.Empty;

            try
            {
                using (var engine = new Tesseract.TesseractEngine(@".\tessdata\", "eng", Tesseract.EngineMode.LstmOnly))
                {
                    var pix = Tesseract.PixConverter.ToPix(theBmp);
                    using (var tessPage = engine.Process(pix))
                    {
                        ocrText = tessPage.GetText();
                    }
                    using (var tessPage = engine.Process(pix, Tesseract.PageSegMode.Auto))
                    {
                        ocrText = tessPage.GetText().Trim();
                    }
                }
            }
            catch (Exception ex)
            {
                if (ex is AccessViolationException || ex is InvalidOperationException)
                {
                    Trace.TraceError("Tesseract Error: " + ex.Message);
                }
                else
                {
                    throw;
                }
            }
            return(ocrText);
        }
Esempio n. 3
0
        private void DoOCR()
        {
            string s_people = "",
                   s_mail   = "";

            try
            {
                var bitmap = screen_rgb.ToBitmap();

                using (var engine = new Tesseract.TesseractEngine(@"./tessdata", "eng", Tesseract.EngineMode.Default))
                {
                    engine.SetVariable("tessedit_char_whitelist", "0123456789");
                    var people = bitmap.Clone(new Rectangle(230, 250, 130, 25), System.Drawing.Imaging.PixelFormat.Format32bppArgb);
                    using (var page = engine.Process(people))
                    {
                        s_people = Regex.Replace(page.GetText(), "[^0-9]", "");
                    }

                    var mail = bitmap.Clone(new Rectangle(395, 250, 182, 25), System.Drawing.Imaging.PixelFormat.Format32bppArgb);
                    using (var page = engine.Process(mail))
                    {
                        s_mail = Regex.Replace(page.GetText(), "[^0-9]", "");
                    }
                }

                if (string.IsNullOrEmpty(s_people))
                {
                    s_people = "0";
                }
                if (string.IsNullOrEmpty(s_mail))
                {
                    s_mail = "0";
                }

                if (s_mail != "0" && s_people != "0")
                {
                    Stats.Add(DateTime.Now, new Tuple <int, int>(int.Parse(s_people), int.Parse(s_mail)));
                }

                if (!IsActive)
                {
                    return;
                }
                Invoke(new Action(() => { PictureBoxOutput.Image = bitmap; }));
            }
            catch (Exception e)
            {
                AddOutput("OCR Error [$:'" + s_people + "', M:'" + s_mail + "'] " + e.Message + e.StackTrace);
            }
        }
Esempio n. 4
0
 /// <summary>
 /// 光学文字認識による画像から文字列の読み出し
 /// </summary>
 /// <param name="bmp">元の画像</param>
 /// <param name="targetLanguageISO639_2">抽出する言語コード</param>
 /// <returns>抽出した文字列</returns>
 public static string ToOcrString(this Bitmap bmp, string targetLanguageISO639_2 = "eng")
 {
     using (var tesseract = new Tesseract.TesseractEngine(DataProperty.TessDataFolder, targetLanguageISO639_2))
     {
         return(tesseract.Process(bmp).GetText().Trim());
     }
 }
Esempio n. 5
0
        public static OCRResult OCRImageWithTesseract(Bitmap theBmp)
        {
            var ocrResult = new OCRResult()
            {
                OcrText = string.Empty, HocrText = string.Empty, MeanConfidence = 0.0f
            };

            try
            {
                using (var engine = new Tesseract.TesseractEngine(@".\tessdata\", "eng", Tesseract.EngineMode.LstmOnly))
                {
                    using (var pix = Tesseract.PixConverter.ToPix(theBmp))
                    {
                        using (var tessPage = engine.Process(pix, Tesseract.PageSegMode.Auto))
                        {
                            ocrResult.OcrText        = tessPage.GetText();
                            ocrResult.HocrText       = tessPage.GetHOCRText(1);
                            ocrResult.MeanConfidence = tessPage.GetMeanConfidence();
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                if (ex is AccessViolationException || ex is InvalidOperationException)
                {
                    Trace.TraceError("Tesseract Error: " + ex.Message);
                }
                else
                {
                    throw;
                }
            }
            return(ocrResult);
        }
        /***********************************************************************************************************************
         *
         * Tesseract-OCR
         *
         ************************************************************************************************************************/
        /// <summary>
        ///
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        /// <example>
        /// http://www.kanenote.org/blog/?p=1228
        /// https://qiita.com/nobi1234/items/c8d7b748c1aa31d771a1
        /// http://whoopsidaisies.hatenablog.com/entry/2013/12/16/174819
        /// https://tesseract-ocr.github.io/tessdoc/Data-Files.html
        /// https://kakusuke98.hatenablog.com/entry/2019/11/14/004609
        /// http://blog.qes.co.jp/2017/06/
        ///
        /// </example>
        private void btn_tesseract_ocr_Click(object sender, EventArgs e)
        {
            //言語ファイルの格納先
            //言語(日本語なら"jpn")
            //string langPath = @"C:\tessdata";
            string langPath = strFolderPath + @"\tessdata";
            string lngStr   = "eng";

            //画像ファイル
            //var img = new Bitmap(@"C:\Temp\test.jpg");
            Bitmap img = (Bitmap)picBoxDst.Image;


            // OCRの実行
            //OpenCvSharp.Text.OCRTesseract.Create("");
            using (var tesseract = new Tesseract.TesseractEngine(langPath, lngStr)) {
                Tesseract.Page page = tesseract.Process(img);       // OCRの実行

                //System.Console.Write(page.GetText());               //表示
                MessageBox.Show(page.GetText());


                //Console.WriteLine(page.GetText());
                //Console.ReadLine();
            }
        }
Esempio n. 7
0
        public static void processImage(Tesseract.TesseractEngine engine, Tesseract.Pix img, string testImagePath, int pageNum)
        {
            System.IO.FileInfo mfile    = new System.IO.FileInfo(testImagePath);
            string             hocrPath = mfile.DirectoryName + "\\" + System.IO.Path.GetFileNameWithoutExtension(testImagePath) + "_p" + pageNum.ToString() + ".xhtml";

            using (var page = engine.Process(img, PageSegMode.AutoOsd))
            {
                page.AnalyseLayout();
                string hocr = page.GetHOCRText(0, true);
                System.IO.File.AppendAllText(hocrPath, hocr);
                var text = page.GetText();
                Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
                Console.WriteLine("Text (GetText): \r\n{0}", text);
                Console.WriteLine("Text (iterator):");
                using (var iter = page.GetIterator())
                {
                    iter.Begin();
                    do
                    {
                        do
                        {
                            do
                            {
                                do
                                {
                                    if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                    {
                                        Console.WriteLine("<BLOCK>");
                                        Rect currentBlock;
                                        iter.TryGetBoundingBox(PageIteratorLevel.Block, out currentBlock);
                                        Console.WriteLine(iter.BlockType.ToString());
                                        Console.WriteLine("(" + currentBlock.X1.ToString() + "," + currentBlock.Y1.ToString() + ")  (" + currentBlock.X2.ToString() + "," + currentBlock.Y2.ToString() + ")");
                                        Console.WriteLine("");
                                    }

                                    Console.Write(iter.GetText(PageIteratorLevel.Word));
                                    Console.Write(" ");

                                    if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                                    {
                                        Console.WriteLine(iter.BlockType.ToString());
                                    }
                                } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                                if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                                {
                                    Console.WriteLine();
                                }
                            } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                        } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                    } while (iter.Next(PageIteratorLevel.Block));
                }
            }
        }
Esempio n. 8
0
        private void Button_Click(object sender, RoutedEventArgs e)
        {
            int err_count = 0;

            while (err_count < 2)
            {
                Microsoft.VisualBasic.Interaction.AppActivate("Overwatch");
                SendKeys.SendWait("{PRTSC}");
                System.Diagnostics.Process[] processList = System.Diagnostics.Process.GetProcesses();
                if (System.Windows.Clipboard.ContainsImage())
                {
                    string lngStr   = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), @"tesseract-ocr\\");
                    string langPath = "eng";
                    var    img      = (Bitmap)System.Windows.Forms.Clipboard.GetImage();
                    var    bitmap24 = new Bitmap(img.Width, img.Height, System.Drawing.Imaging.PixelFormat.Format24bppRgb);
                    using (var gr = Graphics.FromImage(bitmap24))
                    {
                        gr.DrawImage(img, new Rectangle(0, 0, bitmap24.Width, bitmap24.Height));
                    }

                    using (var tesseract = new Tesseract.TesseractEngine(lngStr, langPath))
                    {
                        tesseract.SetVariable("SEARCH", "1");
                        Tesseract.Page page = tesseract.Process(bitmap24);
                        if (page.GetText().Contains("SEARCH") || page.GetText().Contains("TIME") || page.GetText().Contains("ELAPSED") || page.GetText().Contains("LEAGUE") || page.GetText().Contains("HIGHLIGHTS") || page.GetText().Contains("SOCIAL") || page.GetText().Contains("EXIT"))
                        {
                            err_count = 0;
                        }
                        else
                        {
                            err_count += 1;
                        }
                    }
                }
                Thread.Sleep(50000);
            }

            DateTime dt     = DateTime.Now;
            string   result = dt.ToString("yyyy/MM/dd HH:mm:ss");
            string   lnk    = "https://maker.ifttt.com/trigger/match/with/key/xxxxxxxx/?value1=" + result;

            System.Diagnostics.Process.Start(lnk);

            Environment.Exit(0);
        }
Esempio n. 9
0
        }//311行

        //●文字認識処理
        //・引数  Bitmap img :文字認識処理対象の画像を指定する
        //        string lang:文字認識処理を行う言語を指定する
        //・戻り値:文字認識処理結果
        private string chara_recog(Bitmap img, string lang)
        {
            //文字認識結果を格納する変数
            string str;

            // OCRを行うオブジェクトの生成
            //  言語データの場所と言語名を引数で指定する
            var tesseract = new Tesseract.TesseractEngine(
                @"..\..\..\tessdata", // 言語ファイルを「C:\tessdata」に置いた場合
                lang);                // 英語なら"eng" 「○○.traineddata」の○○の部分

            // OCRの実行と表示
            var page = tesseract.Process(img);

            str = page.GetText();

            //文字認識結果を返す
            return(str);
        }//311行
Esempio n. 10
0
 private void Button1_Click(object sender, EventArgs e)
 {
     using (var bmp = Snip())
     {
         if (bmp != null)
         {
             using (var image = new Bitmap(bmp))
             {
                 using (var ocr = new Tesseract.TesseractEngine(@"./tessdata", "eng", Tesseract.EngineMode.Default))
                 {
                     ocr.SetVariable("tessedit_char_whitelist", "0123456789");
                     using (var result = ocr.Process(image, Tesseract.Rect.Empty))
                     {
                         Clipboard.SetText(result.GetText().Trim());
                     }
                 }
             }
         }
     }
 }
        public static T4Result OCRImageResult(Bitmap theBmp, bool getHOCR = false, bool getXHTML = false, bool getJSON = false)
        {
            var result = new T4Result();

            try
            {
                using (var engine = new Tesseract.TesseractEngine(@".\tessdata\", "eng", Tesseract.EngineMode.LstmOnly))
                {
                    var pix = Tesseract.PixConverter.ToPix(theBmp);
                    using (var tessPage = engine.Process(pix, Tesseract.PageSegMode.Auto))
                    {
                        result.MeanConfidence = tessPage.GetMeanConfidence();
                        result.Text           = tessPage.GetText();
                        if (getHOCR)
                        {
                            result.HOCR = tessPage.GetHOCRText(1);
                        }
                        if (getXHTML)
                        {
                            result.XHTML = tessPage.GetHOCRText(1, true);
                        }
                        if (getJSON)
                        {
                            result.JSON = GetJSON(tessPage);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                if (ex is AccessViolationException || ex is InvalidOperationException)
                {
                    Trace.TraceError("Tesseract Error: " + ex.Message);
                }
                else
                {
                    throw;
                }
            }
            return(result);
        }
Esempio n. 12
0
        public void doOcr()
        {
            Bitmap   resizer = new Bitmap(toOcr.Width * MULTI, toOcr.Height * MULTI);
            Graphics g       = Graphics.FromImage(resizer);

            g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.Bicubic;
            g.DrawImage(toOcr, 0, 0, resizer.Width, resizer.Height);
            Bitmap bw = Create1bppImage(resizer);

            Tesseract.Page p = tesseract.Process(bw);

            result = new OcrResults(p.GetText(), bw);

            //            bmp.Save("test0.jpg");
            //            resizer.Save("test.jpg");
            //            bw.Save("testBw.jpg");

            bw.Dispose();
            resizer.Dispose();
            g.Dispose();
            p.Dispose();
            toOcr.Dispose();
            return;
        }
Esempio n. 13
0
        public static byte[] PerformOCRTesseract(byte[] image)
        {
            // Specify that Tesseract use three 3 languages: English, Russian and Vietnamese.
            string tesseractLanguages = "rus+eng+vie";


            // A path to a folder which contains languages data files and font file "pdf.ttf".
            // Language data files can be found here:
            // Good and fast: https://github.com/tesseract-ocr/tessdata_fast
            // or
            // Best and slow: https://github.com/tesseract-ocr/tessdata_best
            // Also this folder must have write permissions.
            string tesseractData = Path.GetFullPath(@"..\..\tessdata\");

            // A path for a temporary PDF file (because Tesseract returns OCR result as PDF document)
            string tempFile = Path.Combine(tesseractData, Path.GetRandomFileName());

            try
            {
                using (Tesseract.IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(tempFile, tesseractData, true))
                {
                    using (renderer.BeginDocument("Serachablepdf"))
                    {
                        using (Tesseract.TesseractEngine engine = new Tesseract.TesseractEngine(tesseractData, tesseractLanguages, Tesseract.EngineMode.Default))
                        {
                            engine.DefaultPageSegMode = Tesseract.PageSegMode.Auto;
                            using (MemoryStream msImg = new MemoryStream(image))
                            {
                                System.Drawing.Image imgWithText = System.Drawing.Image.FromStream(msImg);
                                for (int i = 0; i < imgWithText.GetFrameCount(System.Drawing.Imaging.FrameDimension.Page); i++)
                                {
                                    imgWithText.SelectActiveFrame(System.Drawing.Imaging.FrameDimension.Page, i);
                                    using (MemoryStream ms = new MemoryStream())
                                    {
                                        imgWithText.Save(ms, System.Drawing.Imaging.ImageFormat.Png);
                                        byte[] imgBytes = ms.ToArray();
                                        using (Tesseract.Pix img = Tesseract.Pix.LoadFromMemory(imgBytes))
                                        {
                                            using (var page = engine.Process(img, "Serachablepdf"))
                                            {
                                                renderer.AddPage(page);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                PdfLoadOptions pl = new PdfLoadOptions();
                pl.ShowInvisibleText     = true;
                pl.PreserveEmbeddedFonts = false;
                pl.ConversionMode        = PdfConversionMode.Continuous;

                DocumentCore dc = DocumentCore.Load(File.OpenRead(tempFile + @".pdf"), pl);

                byte[] returnPdf;
                using (MemoryStream ms = new MemoryStream())
                {
                    PdfSaveOptions ps = new PdfSaveOptions();
                    dc.Save(ms, ps);
                    returnPdf = ms.ToArray();
                }
                return(returnPdf);
            }
            catch (Exception e)
            {
                Console.WriteLine();
                Console.WriteLine("Please be sure that you have Language data files (*.traineddata) in your folder \"tessdata\"");
                Console.WriteLine("The Language data files can be download from here: https://github.com/tesseract-ocr/tessdata_fast");
                Console.ReadKey();
                throw new Exception("Error Tesseract: " + e.Message);
            }
            finally
            {
                if (File.Exists(tempFile + ".pdf"))
                {
                    File.Delete(tempFile + ".pdf");
                }
            }
        }
Esempio n. 14
0
        static public void Start()
        {
            capture = new VideoCapture(0);

            Window window = new Window("Camera");

            using (Mat image = new Mat()) // Frame image buffer
            {
                // Load the cascade
                var haarCascade = new CascadeClassifier(@"haarcascade_russian_plate_number.xml");
                Mat mat         = new Mat();

                Rect[] Rects;
                Rect[] Rects2        = new Rect[0];
                Mat    cut_numberMat = new Mat();
                Bitmap cut_numberBitmap;

                Bitmap fullPictureBitmap;
                Mat    fullPictureMat = new Mat();
                string numberStr;
                char[] numberChar;

                Tesseract.TesseractEngine ocr = new Tesseract.TesseractEngine("./tessdata", "eng", Tesseract.EngineMode.Default);          //Tesseract.EngineMode.TesseractAndCube
                Tesseract.Page            page;



                // When the movie playback reaches end, Mat.data becomes NULL.
                while (true)
                {
                    using (Mat frame = GetFrame(true))
                    {
                        if (frame.Empty())
                        {
                            break;
                        }
                        window.ShowImage(frame);

                        // Detect number
                        Rects = haarCascade.DetectMultiScale(frame, 1.1, 4, HaarDetectionType.DoRoughSearch | HaarDetectionType.DoCannyPruning, new OpenCvSharp.Size(100, 100));        //, new OpenCvSharp.Size(100, 100)

                        if (Rects.Length > 0)
                        {
                            MemoryStream stream    = new MemoryStream();
                            Bitmap       tmpBitmap = OpenCvSharp.Extensions.BitmapConverter.ToBitmap(frame);

                            tmpBitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Jpeg);

                            var pic = Image.FromStream(stream);

                            using (Graphics g = Graphics.FromImage(pic))
                            {
                                g.DrawImage(tmpBitmap, 0, 0);
                                Pen p = new Pen(Color.Red, 5);

                                g.DrawRectangle(p, Rects[0].X, Rects[0].Y, Rects[0].Width, Rects[0].Height);

                                g.Save();

                                //tmpBitmap.Save("myfile2.png", System.Drawing.Imaging.ImageFormat.Png);
                                //tmpBitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Jpeg);

                                fullPictureBitmap = (Bitmap)pic;

                                Cv2.Canny(frame, fullPictureMat, 150, 300);

                                mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(fullPictureBitmap);


                                //Cv2.Canny(frame, fullPictureMat, 50, 200);
                                fullPictureBitmap = OpenCvSharp.Extensions.BitmapConverter.ToBitmap(fullPictureMat);

                                Rectangle rectangle = new Rectangle(Rects[0].X, Rects[0].Y, Rects[0].Width, Rects[0].Height);
                                //Region region = new Region(rectangle);

                                cut_numberBitmap = fullPictureBitmap.Clone(rectangle, System.Drawing.Imaging.PixelFormat.Format8bppIndexed);



                                page = ocr.Process(cut_numberBitmap);



                                numberStr = page.GetText();


                                if (numberStr.Length > 4 && numberStr.Length < 10)
                                {
                                    if (Char.IsDigit(numberStr, 1) == true && Char.IsDigit(numberStr, 2) == true && Char.IsDigit(numberStr, 3) == true)
                                    {
                                        cut_numberBitmap.Save("myfile2.png", System.Drawing.Imaging.ImageFormat.Png);
                                        Console.WriteLine(numberStr);
                                    }
                                }
                                page.Dispose();
                                //cut_numberBitmap.Dispose();
                            }

                            window.ShowImage(mat);
                        }
                    }

                    Cv2.WaitKey(30);
                }
            }
        }