Ejemplo n.º 1
3
        public static void Main(string[] args)
        {
            var testImagePath = "./phototest.tif";
            if (args.Length > 0) {
                testImagePath = args[0];
            }

            try {
                var logger = new FormattedConsoleLogger();
                var resultPrinter = new ResultPrinter(logger);
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) {
                    using (var img = Pix.LoadFromFile(testImagePath)) {
                        using (logger.Begin("Process image")) {
                            var i = 1;
                            using (var page = engine.Process(img)) {
                                var text = page.GetText();
                                logger.Log("Text: {0}", text);
                                logger.Log("Mean confidence: {0}", page.GetMeanConfidence());

                                using (var iter = page.GetIterator()) {
                                    iter.Begin();
                                    do {
                                        if (i % 2 == 0) {
                                            using (logger.Begin("Line {0}", i)) {
                                                do {
                                                    using (logger.Begin("Word Iteration")) {
                                                        if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) {
                                                            logger.Log("New block");
                                                        }
                                                        if (iter.IsAtBeginningOf(PageIteratorLevel.Para)) {
                                                            logger.Log("New paragraph");
                                                        }
                                                        if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine)) {
                                                            logger.Log("New line");
                                                        }
                                                        logger.Log("word: " + iter.GetText(PageIteratorLevel.Word));
                                                        ResultIterator testiter = iter.Clone ();
                                                        logger.Log("from clone: " + testiter.GetText(PageIteratorLevel.Word));
                                                    }
                                                } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
                                            }
                                        }
                                        i++;
                                    } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                Trace.TraceError(e.ToString());
                Console.WriteLine("Unexpected Error: " + e.Message);
                Console.WriteLine("Details: ");
                Console.WriteLine(e.ToString());
            }
            Console.Write("Press any key to continue . . . ");
            Console.ReadKey(true);
        }
        /// <summary>
        /// pulls marked from param inputFilePath image
        /// </summary>
        /// <param name="fileBytes">Byte array of file data</param>
        /// <returns>encodes text</returns>
        public static string getText(byte[] fileBytes)
        {
            string text = "", rootPath = HostingEnvironment.ApplicationPhysicalPath;
            BitmapToPixConverter b = new BitmapToPixConverter();

            using (Stream memStream = new MemoryStream(fileBytes))
            using (Bitmap image = (Bitmap)Image.FromStream(memStream))
            using (TesseractEngine ocr = new TesseractEngine(rootPath, "eng", EngineMode.TesseractOnly))
            {

                image.SetResolution(300, 300);
                ocr.SetVariable("tessedit_char_whitelist", "\\/|#");
                Pix p = b.Convert(image);
                p = p.ConvertRGBToGray();
                Page page = ocr.Process(p, PageSegMode.Auto);
                text = page.GetText();
                p.Dispose();
                page.Dispose();
            }

            return text;
        }
Ejemplo n.º 3
0
        public string OCRTiff(string path)
        {
            String PageText; String Result = String.Empty;
            try
            {

                using (TesseractEngine TE = new TesseractEngine(TessDataPath, "eng", EngineMode.Default))
                {
                    using (Pix Image = Pix.LoadFromFile(TiffPath))
                    {
                        PageText = TE.Process(Image).GetText();
                    }
                }
                String TextFilePath = string.Format("{0}{1}", TiffPath.Substring(0, TiffPath.Length - 4), ".txt");

                File.WriteAllText(TextFilePath, PageText);

                Result = "Sucess!";

            }
            catch (Exception Exception)
            {
                Result = String.Format("Error: {0}", Exception.Message);
            }

            return Result;
        }
Ejemplo n.º 4
0
 private OCREngine()
 {
     m_ocrEngine = new TesseractEngine(Configuration.Instance.OCR.TesseractDataPath,
         "eng", EngineMode.TesseractAndCube);
     m_jsonCardsFilePath = Configuration.Instance.JSONCardsFilePath;
     m_cards = new List<string>();
 }
 public void imageOCRTest(FileDialog openImageDialog )
 {
     DialogResult ImageResult = openImageDialog.ShowDialog();
      if (ImageResult == DialogResult.OK)
      {
          String testImagePath = openImageDialog.FileName;
          try
          {
              using (var tEngine = new TesseractEngine("C:\\Users\\yeghiakoronian\\Documents\\visual studio 2013\\Projects\\NLP Genre Recogition\\NLP Genre Recogition\\tessdata", "eng", EngineMode.Default)) //creating the tesseract OCR engine with English as the language
              {
                  using (var img = Pix.LoadFromFile(testImagePath)) // Load of the image file from the Pix object which is a wrapper for Leptonica PIX structure
                  {
                      using (var page = tEngine.Process(img)) //process the specified image
                      {
                          String text = page.GetText(); //Gets the image's content as plain text.
                          MessageBox.Show(text);
                          getGenreOfSong(text);
                          //  Console.ReadKey();
                      }
                  }
              }
          }
          catch (IOException)
          {
              MessageBox.Show("Woops Cant Open The File", "COMP 6781: NLP", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
          }
      }
 }
Ejemplo n.º 6
0
        public static string ReadText(string inputImagePath)
        {
            var           srcImage = new Mat(inputImagePath);
            List <Bitmap> bitmap;
            var           bbox = Cv2.SelectROIs("Select Text Boxes. Enter for confirm, Esc for finish", srcImage);

            Cv2.DestroyWindow("Select Text Boxes. Enter for confirm, Esc for finish");
            if (bbox == null || bbox.Length == 0)
            {
                bitmap = new List <Bitmap>()
                {
                    OpenCvSharp.Extensions.BitmapConverter.ToBitmap(srcImage)
                }
            }
            ;
            else
            {
                bitmap = bbox.Select(x => OpenCvSharp.Extensions.BitmapConverter.ToBitmap(srcImage.SubMat(x))).ToList();
            }

            using (var ocr = new Tesseract.TesseractEngine(@"../../../../OpenCVHavrylov/Data/tessdata", "eng"))
            {
                var res = "";
                int i   = 1;
                foreach (var img in bitmap)
                {
                    using (var page = ocr.Process(img))
                    {
                        res += $"[Block {i}]: {page.GetText()}";
                        i++;
                    }
                }
                return(res);
            }
        }
Ejemplo n.º 7
0
        private string BeginRecognize(string pathToImage)
        {
            try
            {
                if (!File.Exists(pathToImage))
                    return "Image not found";

                using (var engine = new TesseractEngine(_pathToTestData, Language, EngineMode.Default))
                {
                    using (var img = Pix.LoadFromFile(pathToImage))
                    {
                        using (var page = engine.Process(img))
                        {
                            var resultText = page.GetText();
                            if (!String.IsNullOrEmpty(resultText))
                                return resultText;
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Trace.TraceError(e.ToString());
                MessageBox.Show(e.StackTrace);
                return null;
            }
            return null;
        }
        private static void Main(string[] args)
        {
            Console.Write("Input image path:");

            string input = Console.ReadLine();

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            T.TesseractEngine engine = new T.TesseractEngine("tessdata", "tur", T.EngineMode.TesseractAndLstm);

            stopwatch.Stop();

            Console.WriteLine("Engine creation :" + stopwatch.ElapsedMilliseconds.ToString() + " ms");

            stopwatch.Restart();

            T.Pix image = T.Pix.LoadFromFile(input);

            T.Page page = engine.Process(image);

            string text = page.GetText();

            stopwatch.Stop();

            Console.WriteLine("Process time :" + stopwatch.ElapsedMilliseconds.ToString() + " ms");

            Console.Write("Result: " + text);

            Console.Read();
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Constructor
        /// </summary>
        public CardRecognizer()
        {
            //Initialize common filter sequence , this sequence generally will be applied
            commonSeq = new FiltersSequence();
            commonSeq.Add(Grayscale.CommonAlgorithms.BT709);
            commonSeq.Add(new BradleyLocalThresholding());
            commonSeq.Add(new DifferenceEdgeDetector());

            //Load Templates From Resources ,
            //Templates will be used for template matching
            j = miranda.ui.Properties.Resources.J;
            k = miranda.ui.Properties.Resources.K;
            q = miranda.ui.Properties.Resources.Q;
            clubs = miranda.ui.Properties.Resources.Clubs;
            diamonds = miranda.ui.Properties.Resources.Diamonds;
            spades = miranda.ui.Properties.Resources.Spades;
            hearts = miranda.ui.Properties.Resources.Hearts;

            try
            {
                _engine = new TesseractEngine(@"./tessdata", "rus", EngineMode.Default);
                //_engine.SetVariable("tessedit_char_whitelist", "$.,0123456789");
            }
            catch (Exception ex)
            {
                Trace.TraceError(ex.ToString());
                Ex.Report(ex);
            }
        }
        public WrapperTesseract(string path, string lng)
        {
            //Path should be same as TESSDATA folder
            //This path should always end with a "/" or "\", e.g., TESSDATA_PREFIX="/usr/share/tesseract-ocr/"

            ////// Emgu Tesseract(Tesseract3.1)///////
            // _ocr = new Emgu.CV.OCR.Tesseract(path, lng, Emgu.CV.OCR.Tesseract.OcrEngineMode.OEM_TESSERACT_CUBE_COMBINED);
            // _ocr.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
            // _ocr.SetVariable("user_words_suffix", "user-words");
            //  _ocr.SetVariable("chop_enable", "1");
            //  _ocr.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./");
            ////// Emgu Tesseract(Tesseract3.1)///////

            Log.WriteLine("Setting Tesseract traindata and language");
            //////  Tesseract 3.2 ////////
            if (lng=="eng")
                _engine = new TesseractEngine(@"./tessdata3/", lng, EngineMode.TesseractAndCube);
            else

                _engine = new TesseractEngine(@"./tessdata3", lng, EngineMode.Default);
            Log.WriteLine("Tesseract Version: " + _engine.Version);
            //_engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
            //_engine.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./");
            //////  Tesseract 3.2 ////////
        }
Ejemplo n.º 11
0
        /// <summary>
        /// 文字認識
        /// </summary>
        private void AnalyzeString()
        {
            // Bitmapを処理、適度に切り取った方がOCRを扱いやすい
            System.Drawing.Rectangle rect = new System.Drawing.Rectangle(20, 90, 450, 100);
            SrcImg = SrcImg.Clone(rect, SrcImg.PixelFormat);

            // 確認用の画面に画像を出力
            this.Dispatcher.Invoke(() => {
                this._Image_crop.Source = ConvertBitmapToIS(SrcImg);
            });

            // 以下OCR
            string langPath = @"C:\tessdata";
            string lngStr   = "eng";

            //画像ファイルでテストするならパス指定
            //var img = new Bitmap(@"C:\test.jpg");
            var img = SrcImg;

            using (var tesseract = new Tesseract.TesseractEngine(langPath, lngStr))
            {
                // OCRの実行
                Pix            pix  = PixConverter.ToPix(img);
                Tesseract.Page page = tesseract.Process(pix);

                //表示
                Console.WriteLine(page.GetText());
                Console.ReadLine(); // err?
            }
        }
Ejemplo n.º 12
0
        public static string ocr(Bitmap CropedPrescription, int x, int y, int width, int height)
        {
            string text;

            // var PrescriptionImage = CropedPrescription;
            using (var engine = new TesseractEngine(@"C:\Program Files\Tesseract-OCR\tessdata\", "kor", EngineMode.Default))
            {
                engine.SetVariable("tessedit_char_whitelist", "0123456789-."); // 숫자와 . - 만 인식하도록 설정

                var roi = new Rect(x, y, width, height); // region of interest 좌표를 생성하고
                //     using (var img = Pix.LoadFromFile(PrescriptionImage))
                //      {
                using (var page = engine.Process(CropedPrescription, roi, PageSegMode.SingleLine))
                {
                    text = page.GetText();
                    System.IO.File.WriteAllText(@"C:\Program Files\PLOCR\textrecognition.html", text);  // 인식한 글자를 html 형식으로 저장한다.

                //    text = TextProcess.RemoveWhiteSpace(text);

                    //      Console.WriteLine("인식한 문자: \n{0}\n", text);
                    //   Console.Read();
                }
                //         }
            }

            return text;
        }
Ejemplo n.º 13
0
        public override string ExtractContent(int? pageNumber)
        {
            var dataPath = Config.TessDataPath;

            if (!Directory.Exists(dataPath))
            {
                throw new ArgumentException("Path does not exist or access is denied.", nameof(dataPath));
            }

            using (var engine = new TesseractEngine(dataPath, "eng", EngineMode.Default))
            {
                using (var memoryStream = new MemoryStream(Buffer))
                {
                    // have to load Pix via a bitmap since Pix doesn't support loading a stream.
                    using (var image = new Bitmap(memoryStream))
                    {
                        using (var pix = PixConverter.ToPix(image))
                        {
                            using (var page = engine.Process(pix))
                            {
                                return page.GetText();
                            }
                        }
                    }
                }
            }
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Charactor Recognition Proccess
        /// </summary>
        /// <param name="imgPath"></param>
        /// <param name="dataDirPath"></param>
        /// <param name="lang"></param>
        /// <returns></returns>
        private static string Character_recognition_Process(string imgPath, string dataDirPath, string lang)
        {
            if (!System.IO.File.Exists(imgPath))
            {
                //Console.Error.WriteLine("画像のパスに画像が見つかりませんでした");
                return("画像のパスに画像が見つかりませんでした");
            }

            //Reading language learning data
            string traindedDataPath = System.IO.Path.Combine(dataDirPath, lang + ".traineddata");

            if (!System.IO.File.Exists(traindedDataPath))
            {
                //Console.Error.WriteLine(lang + ".traineddataがみつかりませんでした");
                return(lang + ".traineddataがみつかりませんでした");
            }

            // Character recognition
            using (TesseractEngine tesseract = new Tesseract.TesseractEngine(dataDirPath, lang))
            {
                // Import an image file
                var img = new System.Drawing.Bitmap(imgPath);

                // Specifying a character
                tesseract.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,");
                // PERFORM OCR
                Tesseract.Page page = tesseract.Process(img);
                //Console.WriteLine(page.GetText());

                return(page.GetText());
            }
        }
Ejemplo n.º 15
0
        public static string GetOCRText(string engpath, Image imgdata)
        {
            string rst = "";

            try
            {
                Directory.SetCurrentDirectory(System.IO.Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]));
                using (var engine = new TesseractEngine(engpath, "eng", EngineMode.Default))
                {
                    using (Bitmap image = new Bitmap(imgdata))
                    {
                        using (var pix = PixConverter.ToPix(image))
                        {
                            using (var page = engine.Process(pix))
                            {
                                rst = page.GetText().Trim();
                            }
                        }
                    }
                }
            }
            catch (System.Exception ex)
            {
                ScratchModel.WriteLogFile("----Error----", "------ GetOCRText1 -------", "");

            }

            return rst;
        }
Ejemplo n.º 16
0
        private void btnOCR_Click(object sender, EventArgs e)
        {
            try
            {
                string testImagePath = this.txtFile.Text.Trim();
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    using (var img = Pix.LoadFromFile(testImagePath))
                    {
                        System.Diagnostics.Debug.WriteLine("Process image");

                        var i = 1;
                        using (var page = engine.Process(img))
                        {
                            var text = page.GetText();
                            System.Diagnostics.Debug.WriteLine("Text: {0}", text);
                            System.Diagnostics.Debug.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());

                            using (var iter = page.GetIterator())
                            {
                                iter.Begin();
                                do
                                {
                                    if (i % 2 == 0)
                                    {
                                        System.Diagnostics.Debug.WriteLine("Line {0}", i);
                                        do
                                        {
                                            System.Diagnostics.Debug.WriteLine("Word Iteration");

                                            if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                            {
                                                System.Diagnostics.Debug.WriteLine("New block");
                                            }
                                            if (iter.IsAtBeginningOf(PageIteratorLevel.Para))
                                            {
                                                System.Diagnostics.Debug.WriteLine("New paragraph");
                                            }
                                            if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine))
                                            {
                                                System.Diagnostics.Debug.WriteLine("New line");
                                            }
                                            System.Diagnostics.Debug.WriteLine("word: " + iter.GetText(PageIteratorLevel.Word));
                                        } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
                                    }
                                    i++;
                                } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Unexpected Error: " + ex.Message);
                Console.WriteLine("Details: ");
                Console.WriteLine(ex.ToString());
            }
        }
Ejemplo n.º 17
0
        // 辨識驗證碼
        private string parseCaptchaStr(Image image)
        {
            Tesseract.TesseractEngine ocr = new Tesseract.TesseractEngine(@"tessdata\", "yzufont");
            ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
            Page tmpPage = ocr.Process(new Bitmap(image), pageSegMode: ocr.DefaultPageSegMode);

            return(Regex.Replace(tmpPage.GetText(), @"\s+", String.Empty));
        }
Ejemplo n.º 18
0
 internal Page(TesseractEngine engine, Pix image, string imageName, Rect regionOfInterest, PageSegMode pageSegmentMode)
 {
     Engine = engine;
     Image = image;
     ImageName = imageName;
     RegionOfInterest = regionOfInterest;
     PageSegmentMode = pageSegmentMode;
 }
Ejemplo n.º 19
0
 /// <summary>
 /// Creates OCR engine
 /// </summary>
 /// <returns></returns>
 private Tesseract.TesseractEngine CreateEngine()
 {
     Tesseract.TesseractEngine tesseractEngine = new Tesseract.TesseractEngine("tessdata", GetLanguageNamesString(RecognitionLanguage1, RecognitionLanguage2), EngineMode.ToEngineMode());
     if (SearchForDigitsOnly)
     {
         tesseractEngine.SetVariable("tessedit_char_whitelist", "0123456789");
     }
     return(tesseractEngine);
 }
Ejemplo n.º 20
0
 /// <summary>
 /// Processes the specified image path.
 /// </summary>
 /// <param name="bitmap">Bitmap.</param>
 /// <returns></returns>
 public string Process(Bitmap bitmap)
 {
     using (Tesseract.TesseractEngine engine = CreateEngine())
     {
         engine.DefaultPageSegMode = (PageSegMode)DefaultSegmentationMode;
         using (Page page = engine.Process(bitmap))
             return(page.GetText());
     }
 }
Ejemplo n.º 21
0
        public void TestInitialize()
        {
            // OCRを行うオブジェクトの生成
            //  言語データの場所と言語名を引数で指定する
            var path = testPath + @"\tessdata";

            // 英語なら"eng" 「○○.traineddata」の○○の部分
            tesseractJpn = new TesseractEngine(path, "jpn");
            tesseractEng = new TesseractEngine(path, "eng");
        }
        //// Do training for all existing trained Data
        public SVM(string TrainedDataInputFile)
        {
            _engine = new TesseractEngine(@"./tessdata3", "eng", EngineMode.TesseractAndCube);
            _engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
            _engine.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./");

            string[] TrainedData = Directory.GetFiles(TrainedDataInputFile, "*.png");
            double[][] inputs = new double[TrainedData.Length][];   ///
            double[] InputArray = new double[784];
            int[] Outputs = new int[TrainedData.Length];

            for (int i = 0; i < TrainedData.Length; i++)
            {
                string filename = Path.GetFileNameWithoutExtension(TrainedData[i]);
                Bitmap TrainingImage = new Bitmap(TrainedData[i]);
                string[] split = filename.Split('.');
                for (int j = 0; j < 28; j++)
                {
                    for (int k = 0; k < 28; k++)
                    {
                        if ((!TrainingImage.GetPixel(j, k).Name.Equals("ffffffff")))
                            InputArray[j * 28 + k] = 1;
                        else
                            InputArray[j * 28 + k] = 0;
                    }
                }

                inputs[i] = InputArray;
                Outputs[i] = Convert.ToInt32(split[0]);
                InputArray = new double[784];
            }

            IKernel kernel;
            kernel = new Polynomial(2, 0);
            ksvm = new MulticlassSupportVectorMachine(784, kernel, 2);
            MulticlassSupportVectorLearning ml = new MulticlassSupportVectorLearning(ksvm, inputs, Outputs);

            double complexity = 1;   ///// set these three parameters Carefuly later
            double epsilon = 0.001;
            double tolerance = 0.2;

            ml.Algorithm = (svm, classInputs, classOutputs, i, j) =>
            {
                var smo = new SequentialMinimalOptimization(svm, classInputs, classOutputs);
                smo.Complexity = complexity;  /// Cost parameter for SVM
                smo.Epsilon = epsilon;
                smo.Tolerance = tolerance;
                return smo;
            };

            // Train the machines. It should take a while.
            double error = ml.Run();
        }
Ejemplo n.º 23
0
        private string OCR(Bitmap b)
        {
            string res = "";
            using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default))
            {
                engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ");
                engine.SetVariable("tessedit_unrej_any_wd", true);

                using (var page = engine.Process(b, PageSegMode.SingleLine))
                    res = page.GetText();
            }
            return res;
        }
Ejemplo n.º 24
0
 public static async Task init()
 {
     try {
         string exe_dir = (new FileInfo(System.Reflection.Assembly.GetEntryAssembly().Location)).Directory.FullName;
         string tessdata_dir = Path.Combine(exe_dir, "tessdata");
         engine = new TesseractEngine(tessdata_dir, "eng", EngineMode.TesseractAndCube);
     } catch (Exception e) {
         Trace.TraceError(e.ToString());
         Console.WriteLine("Unexpected Error: " + e.Message);
         Console.WriteLine("Details: ");
         Console.WriteLine(e.ToString());
     }
 }
        private void LoadButton_OnClick(object sender, RoutedEventArgs e)
        {
            var fileDialog = new OpenFileDialog();
            var showDialog = fileDialog.ShowDialog();

            if (showDialog != null && showDialog.Value)
            {
                //@"D:\HandwriteExpressionRecognition\HandwriteExpressionRecognition\HandwriteExpressionRecognition.Desktop\TestData\123456789.bmp"
                var tessEngine = new Tesseract.TesseractEngine("tessdata", "eng", EngineMode.Default);
                var page       = tessEngine.Process(
                    Pix.LoadFromFile(fileDialog.FileName),
                    PageSegMode.Auto);
                TextBlock.Text = page.GetText();
            }
        }
Ejemplo n.º 26
0
        private static string RecognizeTextFromImage(Image image)
        {
            string recognizedText;

            using (var ocrEngine = new TesseractEngine(@"./tessdata", "eng"))
            using (Bitmap bitmap = new Bitmap(image))
            using (Bitmap monochromeBitmap = ImageUtils.Convert24BitToMonochrome(bitmap))
            using (var page = ocrEngine.Process(monochromeBitmap))
            {
                recognizedText = page.GetText();
                Trace.TraceInformation("Text recognized with mean confidence: {0:N3}", page.GetMeanConfidence());
            }

            return recognizedText;
        }
Ejemplo n.º 27
0
        public static string AnalyzeFileHOCR(string p, string fileName)
        {
            var data = System.IO.Path.Combine(p, "tessdata");

            using (var engine = new TesseractEngine(data, "fra", EngineMode.Default))
            {
                using (var img = Pix.LoadFromFile(fileName))
                {
                    using (var page = engine.Process(img))
                    {
                        return page.GetHOCRText(0);
                    }
                }
            }
        }
Ejemplo n.º 28
0
        public vhConsole(IConfig cfg, String uHash)
        {
            vhConsole.uHash = uHash;
            config          = cfg;

            //var info = MyInfo().Result;
            //vhConsole.uHash = (string)info["uhash"];

            if (!Directory.Exists(cfg.tessdata))
            {
                cfg.logger.Log("Cannot find tessdata path: {0}", Path.GetFullPath(cfg.tessdata));
                //throw new Exception();
            }

            engine = new Tesseract.TesseractEngine(cfg.tessdata, "eng");
        }
Ejemplo n.º 29
0
        static void Main(string[] args)
        {
            var image  = new Bitmap(@"C:\\Users\\E566281\\Documents\\2007_AEB_E-pdf-1.pdf");
            var ocr    = new Tesseract.TesseractEngine("C:\\Users\\E566281\\Documents\\2007_AEB_E-pdf-1.pdf", "eng");
            var result = ocr.Process(image, null);

            foreach (var s in result.GetHOCRText(1, false))
            {
                Console.WriteLine(s);
            }
            //var result = ocr          ( image, Rectangle.Empty);
            //foreach (tessnet2.Word word in result)
            //{
            //    Console.writeline(word.text);
            //}
        }
Ejemplo n.º 30
0
 /// <summary>
 /// Gets all the numbers seperately from image.
 /// </summary>
 /// <param name="imagePath">The image path.</param>
 /// <returns></returns>
 public static List<int> GetAllNumbersFromImage(string imagePath)
 {
     var numbersList = new List<int>();
     if (string.IsNullOrWhiteSpace(imagePath) == false && System.IO.File.Exists(imagePath) == true)
     {
         try
         {
             using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
             {
                 using (var img = Pix.LoadFromFile(imagePath))
                 {
                     using (var page = engine.Process(img))
                     {
                         if (string.IsNullOrWhiteSpace(page.GetText()) == false)
                         {
                             page.GetText()
                                 .Split(' ')
                                 .ToList()
                                 .ForEach(text =>
                                         {
                                             if (string.IsNullOrWhiteSpace(text) == false)
                                             {
                                                 var numberValue = 0;
                                                 text = regex.Replace(text, string.Empty);                               //remove non alpha numeric characters
                                                 text = text.ToLowerInvariant().Replace('i', '1').Replace('o', '0');    //to fix wrong interpretation
                                                 if (int.TryParse(text, out numberValue) == false)
                                                 {
                                                     Console.WriteLine("Unable to process the file : " + imagePath);
                                                 }
                                                 else
                                                 {
                                                     numbersList.Add(numberValue);
                                                 }
                                             }
                                         });
                         }
                     }
                 }
             }
         }
         catch (Exception ex)
         {
             Console.WriteLine("Unexpected Error: " + ex.Message);
         }
     }
     return numbersList;
 }
Ejemplo n.º 31
0
        public static string imageToText(string tiffPath)
        {
            string text = "";
            int stop = 0;
            try
            {
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    using (var img = Pix.LoadFromFile(tiffPath))
                    {
                        var i = 1;
                        using (var page = engine.Process(img))
                        {
                            text = page.GetText();
                            Debug.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());

                            using (var iter = page.GetIterator())
                            {
                                iter.Begin();
                                do
                                {
                                    if (i % 2 == 0)
                                    {
                                        do
                                        {
                                            text = text + iter.GetText(PageIteratorLevel.Word);
                                        } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
                                    }
                                    i++;
                                } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));

                                iter.Dispose();
                            }
                        }
                    }
                } 
            }
            catch (Exception e)
            {
                Debug.WriteLine("Unexpected Error: " + e.Message);
                Debug.WriteLine("Details: ");
                Debug.WriteLine(e.ToString());
            }
            
            return text;
        }
Ejemplo n.º 32
0
        private void BTN_Save_Click_1(object sender, EventArgs e)
        {
            string filename = path + @"\img_ocr" + count.ToString();

            camera.Capture(filename);

            Bitmap img = (Bitmap)Bitmap.FromFile(filename + ".jpg");

            img.RotateFlip(RotateFlipType.Rotate180FlipY);

            Tesseract.TesseractEngine tesseract = new Tesseract.TesseractEngine("./dataset", "eng", Tesseract.EngineMode.TesseractOnly);
            var page = tesseract.Process(img);

            listBox1.Items.Add(page.GetText());

            img.Dispose();
            File.Delete(filename + ".jpg");
            count++;
        }
Ejemplo n.º 33
0
        static void Main(string[] args)
        {
            while (true)
            {
                videoSource = new VideoCaptureDevice(new FilterInfoCollection(FilterCategory.VideoInputDevice)[0].MonikerString);
                videoSource.NewFrame += CaptureFrame;

                videoSource.Start();
                Thread.Sleep(1000);
                videoSource.Stop();

                var engine = new TesseractEngine(@"tessdata", "letsgodigital", EngineMode.Default);

                var image = new ElitechStc1000Image("test3.jpg", engine);

                var temp = image.GetTemperature();

                Console.WriteLine(temp);

                try
                {
                    var sensorEvent = new SensorEvent
                    {
                        Temperature = temp
                    };

                    Console.WriteLine("{0} > Sending temperature: {1}", DateTime.Now, temp);

                    var sink = new AzuresStreamAnalytics()
                        .WithConnectionString(ConfigurationManager.AppSettings["EventHubSendConnection"]);

                    sink.Send(sensorEvent);
                }
                catch (Exception exception)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine("{0} > Exception: {1}", DateTime.Now, exception.Message);
                    Console.ResetColor();
                }

                Thread.Sleep(2000);
            }
        }
Ejemplo n.º 34
0
        public string GetOCRText(string engpath, Image imgdata)
        {
            string rst = "";
            using (var engine = new TesseractEngine(engpath, "eng", EngineMode.Default))
            {
                using (Bitmap image = new Bitmap(imgdata))
                {
                    using (var pix = PixConverter.ToPix(image))
                    {
                        using (var page = engine.Process(pix))
                        {
                            //meanConfidenceLabel.InnerText = String.Format("{0:P}", page.GetMeanConfidence());
                            rst = page.GetText().Trim();
                        }
                    }
                }
            }

            return rst;
        }
Ejemplo n.º 35
0
        public void Test()
        {
            var testImagePath = "../../phototest.tif";
            try
            {
                using (var engine = new TesseractEngine(@"../../tessdata", "eng", EngineMode.Default))
                {
                    using (var img = Pix.LoadFromFile(testImagePath))
                    {
                        var i = 1;
                        using (var page = engine.Process(img))
                        {
                            var text = page.GetText();

                            using (var iter = page.GetIterator())
                            {
                                iter.Begin();
                                do
                                {
                                    if (i % 2 == 0)
                                    {
                                        do
                                        {
                                            Console.WriteLine("word: " + iter.GetText(PageIteratorLevel.Word));
                                        } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
                                   
                                    }
                                    i++;
                                } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine("Unexpected Error: " + e.Message);
                Console.WriteLine("Details: ");
                Console.WriteLine(e.ToString());
            }
        }
Ejemplo n.º 36
0
        public string GetText(Bitmap image)
        {
            using (var tesseract = new TesseractEngine(_dataPath, EngineLang, EngineMode.Default))
            {
                tesseract.SetVariable("tessedit_write_images", true);

                using (var pix = PixConverter.ToPix(image))
                {
                    using (var page = tesseract.Process(pix))
                    {
                        var text = RemoveSpecialCharacters(page.GetText());

            #if DEBUG
                        System.Diagnostics.Debug.WriteLine("Confibialidade: " + page.GetMeanConfidence());
                        System.Diagnostics.Debug.WriteLine("Placa: " + text);
            #endif
                        return text;
                    }
                }
            }
        }
Ejemplo n.º 37
0
        public string DownloadAndProcessImage(Uri uri, Rect regionPercentage)
        {
            var engine = new TesseractEngine(_tessdataPath, "hun");

            byte[] imageData;
            try
            {
                var client = new WebClient();
                imageData = client.DownloadData(uri);
                StatisticsCollector.GetInstance().IncrementOutgoingWebRequestCount();

                _log.Debug("Image downloaded from `" + uri + "`. Size: " + imageData.Length + " Byte(s).");
            }
            catch (Exception e)
            {
                _log.Error("Failed to download image from `" + uri + "`. Error: e");
                return "";
            }

            var bitmap = new Bitmap(new MemoryStream(imageData));
            _log.Debug("Image size: " + bitmap.Size);

            // Calculate image region
            var region = new Tesseract.Rect(
                (int) (regionPercentage.Left*bitmap.Width),
                (int) (regionPercentage.Top*bitmap.Height),
                (int) (regionPercentage.Width*bitmap.Width),
                (int) (regionPercentage.Height*bitmap.Height));

            _log.Debug("OCR region: " + region);

            Page page = engine.Process(bitmap, region, PageSegMode.Auto);
            string text = page.GetText();

            _log.Debug("Processed text length: " + text.Length);

            StatisticsCollector.GetInstance().IncrementOcredPageCount();

            return text;
        }
Ejemplo n.º 38
0
        // 특정 좌표 지역을 받아서 그 부분만 판독하는 함수
        public static string hocr(Bitmap source, int x, int y, int width, int height)
        {
            string htext;

            // var PrescriptionImage = CropedPrescription;
            using (var engine = new TesseractEngine(@"C:\Program Files\Tesseract-OCR\tessdata\", "kor", EngineMode.Default))
            {
                //    using (var img = Pix.LoadFromFile(PrescriptionImage)
                //    {
                var roi = new Rect(x, y, width, height); // region of interest 좌표를 생성하고

                using (var page = engine.Process(source, roi, PageSegMode.Auto))
                {
                    htext = page.GetHOCRText(3);
                    System.IO.File.WriteAllText(@"C:\Program Files\PLOCR\textrecognition.html", htext);  // 인식한 글자를 html 형식으로 저장한다.
                    //  Console.WriteLine(htext);
                    //   Console.Read();
                }
                //    }
                return htext;
            }
        }
        /// <summary>
        /// Método responsavel por realizar a quebra do captcha
        /// </summary>
        /// <param name="image">Byte Array da imagem captcha</param>
        /// <param name="config">Objeto contendo informações relevantes para solucionar o captcha</param>
        /// <returns>String com a solução do captcha</returns>
        public string Break(byte[] image, CaptchaConfig config)
        {
            var tempfilename = Path.GetTempFileName();
            
            File.WriteAllBytes(tempfilename, image);
            
            var stbResult = new StringBuilder();
            
            using (var engine = new TesseractEngine(ConfigManager.GetInstance().CAMINHO_ARQUIVO_IDIOMA, IDIOMA_PORTUGUES, EngineMode.Default))
            using (var img = Pix.LoadFromFile(tempfilename))
            {
                var i = 1;
                using (var page = engine.Process(img, null))
                {
                    stbResult.AppendLine(page.GetText());
                    if (config.NumeroDePalavras > 1)
                    {
                        using (var iter = page.GetIterator())
                        {
                            iter.Begin();
                            do
                            {
                                if (i % 2 == 0)
                                {
                                    do
                                    {
                                        stbResult.AppendLine(iter.GetText(PageIteratorLevel.Word));
                                    } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
                                }
                                i++;
                            } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                        }
                    }
                }
            }

            return stbResult.ToString().Replace("\r", string.Empty).Replace("\n", string.Empty); ;
        }
Ejemplo n.º 40
0
        private void OcrRecognize(string path)
        {
            _tesseractEngine = new TesseractEngine(tessData, Lang, EngineMode.Default);
            _words = new List<Line>();
            var img = Pix.LoadFromFile(path);
            var page = _tesseractEngine.Process(img);
            var text = page.GetText();
            using (var iter = page.GetIterator())
            {
                iter.Begin();
                Rect rect = new Rect();
                do
                {
                    iter.TryGetBoundingBox(PageIteratorLevel.TextLine, out rect);
                    _words.Add(new Line(rect.X1, rect.Y1, rect.Width, rect.Height, iter.GetText(PageIteratorLevel.TextLine),path));
                    //System.Diagnostics.Debug.WriteLine("X:{0}, Y:{1}", rect.X1, rect.Y1);
                } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine));

            }
            img.Dispose();
            page.Dispose();
            _tesseractEngine.Dispose();
        }
Ejemplo n.º 41
0
        public static string AnalyzeFile(string key, string p, string fileName)
        {
            var data = System.IO.Path.Combine(p, "tessdata");

            using (var engine = new TesseractEngine(data, "fra", EngineMode.Default))
            {
                string bitmapPath = System.IO.Path.Combine(p, Guid.NewGuid().ToString() + System.IO.Path.GetExtension(fileName));
                System.IO.MemoryStream ms = new System.IO.MemoryStream(Utility.Security.AES.DecryptFile(key, fileName));
                System.Drawing.Bitmap bitmap = new System.Drawing.Bitmap(ms);
                bitmap = Treatment.SetContrast(bitmap, 20);
                bitmap.Save(bitmapPath);
                ms.Close();

                using (var img = Pix.LoadFromFile(bitmapPath))
                {
                    using (var page = engine.Process(img))
                    {
                        System.IO.File.Delete(bitmapPath);
                        return page.GetText().Trim();
                    }
                }
            }
        }