/// <summary>Preprocess given image if it is needed.</summary> /// <param name="inputImage"> /// original input image /// <see cref="System.IO.FileInfo"/> /// </param> /// <param name="pageNumber">number of page to be OCRed</param> /// <returns> /// path to output image as /// <see cref="System.String"/> /// </returns> private String PreprocessImage(FileInfo inputImage, int pageNumber) { String tmpFileName = TesseractOcrUtil.GetTempFilePath(Guid.NewGuid().ToString(), GetExtension(inputImage)); String path = inputImage.FullName; try { if (GetTesseract4OcrEngineProperties().IsPreprocessingImages()) { Pix pix = ImagePreprocessingUtil.PreprocessImage(inputImage, pageNumber); TesseractOcrUtil.SavePixToTempPngFile(tmpFileName, pix); if (!File.Exists(System.IO.Path.Combine(tmpFileName))) { System.Drawing.Bitmap img = TesseractOcrUtil.ConvertPixToImage(pix); if (img != null) { TesseractOcrUtil.SaveImageToTempPngFile(tmpFileName, img); } } } if (!GetTesseract4OcrEngineProperties().IsPreprocessingImages() || !File.Exists(System.IO.Path.Combine(tmpFileName ))) { TesseractOcrUtil.CreateTempFileCopy(path, tmpFileName); } if (File.Exists(System.IO.Path.Combine(tmpFileName))) { path = tmpFileName; } } catch (System.IO.IOException e) { LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE , e.Message)); } return(path); }
public virtual void TestReadingInvalidImagePath() { NUnit.Framework.Assert.That(() => { String path = TEST_IMAGES_DIRECTORY + "numbers_02"; FileInfo imgFile = new FileInfo(path); ImagePreprocessingUtil.PreprocessImage(imgFile, 1); } , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>()) ; }
/// <summary> /// Gets OCR result from provided single page image and preprocesses it if /// it is needed. /// </summary> /// <param name="inputImage"> /// input image /// <see cref="System.IO.FileInfo"/> /// </param> /// <param name="outputFormat"> /// selected /// <see cref="OutputFormat"/> /// for tesseract /// </param> /// <param name="pageNumber">number of page to be OCRed</param> /// <returns>result as string that will be written to a temporary file later</returns> private String GetOcrResultForSinglePage(FileInfo inputImage, OutputFormat outputFormat, int pageNumber) { String result = null; try { // preprocess if required if (GetTesseract4OcrEngineProperties().IsPreprocessingImages()) { // preprocess and try to ocr result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), ImagePreprocessingUtil.PreprocessImage (inputImage, pageNumber), outputFormat); } if (result == null) { System.Drawing.Bitmap bufferedImage = ImagePreprocessingUtil.ReadImage(inputImage); if (bufferedImage != null) { try { result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), bufferedImage, outputFormat); } catch (Exception e) { // NOSONAR LogManager.GetLogger(GetType()).Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE , e.Message)); } } if (result == null) { // perform ocr using original input image result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), inputImage, outputFormat); } } } catch (Exception e) { // NOSONAR LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED , e.Message)); throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED); } return(result); }