Ejemplo n.º 1
0
        /// <summary>Preprocess given image if it is needed.</summary>
        /// <param name="inputImage">
        /// original input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="pageNumber">number of page to be OCRed</param>
        /// <returns>
        /// path to output image as
        /// <see cref="System.String"/>
        /// </returns>
        private String PreprocessImage(FileInfo inputImage, int pageNumber)
        {
            String tmpFileName = TesseractOcrUtil.GetTempFilePath(Guid.NewGuid().ToString(), GetExtension(inputImage));
            String path        = inputImage.FullName;

            try {
                if (GetTesseract4OcrEngineProperties().IsPreprocessingImages())
                {
                    Pix pix = ImagePreprocessingUtil.PreprocessImage(inputImage, pageNumber);
                    TesseractOcrUtil.SavePixToTempPngFile(tmpFileName, pix);
                    if (!File.Exists(System.IO.Path.Combine(tmpFileName)))
                    {
                        System.Drawing.Bitmap img = TesseractOcrUtil.ConvertPixToImage(pix);
                        if (img != null)
                        {
                            TesseractOcrUtil.SaveImageToTempPngFile(tmpFileName, img);
                        }
                    }
                }
                if (!GetTesseract4OcrEngineProperties().IsPreprocessingImages() || !File.Exists(System.IO.Path.Combine(tmpFileName
                                                                                                                       )))
                {
                    TesseractOcrUtil.CreateTempFileCopy(path, tmpFileName);
                }
                if (File.Exists(System.IO.Path.Combine(tmpFileName)))
                {
                    path = tmpFileName;
                }
            }
            catch (System.IO.IOException e) {
                LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE
                                                                               , e.Message));
            }
            return(path);
        }
Ejemplo n.º 2
0
 public virtual void TestReadingInvalidImagePath()
 {
     NUnit.Framework.Assert.That(() => {
         String path      = TEST_IMAGES_DIRECTORY + "numbers_02";
         FileInfo imgFile = new FileInfo(path);
         ImagePreprocessingUtil.PreprocessImage(imgFile, 1);
     }
                                 , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>())
     ;
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Gets OCR result from provided single page image and preprocesses it if
        /// it is needed.
        /// </summary>
        /// <param name="inputImage">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="outputFormat">
        /// selected
        /// <see cref="OutputFormat"/>
        /// for tesseract
        /// </param>
        /// <param name="pageNumber">number of page to be OCRed</param>
        /// <returns>result as string that will be written to a temporary file later</returns>
        private String GetOcrResultForSinglePage(FileInfo inputImage, OutputFormat outputFormat, int pageNumber)
        {
            String result = null;

            try {
                // preprocess if required
                if (GetTesseract4OcrEngineProperties().IsPreprocessingImages())
                {
                    // preprocess and try to ocr
                    result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), ImagePreprocessingUtil.PreprocessImage
                                                                             (inputImage, pageNumber), outputFormat);
                }
                if (result == null)
                {
                    System.Drawing.Bitmap bufferedImage = ImagePreprocessingUtil.ReadImage(inputImage);
                    if (bufferedImage != null)
                    {
                        try {
                            result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), bufferedImage, outputFormat);
                        }
                        catch (Exception e) {
                            // NOSONAR
                            LogManager.GetLogger(GetType()).Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE
                                                                                          , e.Message));
                        }
                    }
                    if (result == null)
                    {
                        // perform ocr using original input image
                        result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), inputImage, outputFormat);
                    }
                }
            }
            catch (Exception e) {
                // NOSONAR
                LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED
                                                                               , e.Message));
                throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
            }
            return(result);
        }