/// <summary> /// Gets OCR result from provided multi-page image and returns result as /// list of strings for each page. /// </summary> /// <remarks> /// Gets OCR result from provided multi-page image and returns result as /// list of strings for each page. This method is used for tiff images /// when preprocessing is not needed. /// </remarks> /// <param name="inputImage"> /// input image /// <see cref="System.IO.FileInfo"/> /// </param> /// <param name="outputFormat"> /// selected /// <see cref="OutputFormat"/> /// for tesseract /// </param> /// <returns> /// list of result string that will be written to a temporary files /// later /// </returns> private IList <String> GetOcrResultForMultiPage(FileInfo inputImage, OutputFormat outputFormat) { IList <String> resultList = new List <String>(); try { InitializeTesseract(outputFormat); TesseractOcrUtil util = new TesseractOcrUtil(); util.InitializeImagesListFromTiff(inputImage); int numOfPages = util.GetListOfPages().Count; for (int i = 0; i < numOfPages; i++) { String result = util.GetOcrResultAsString(GetTesseractInstance(), util.GetListOfPages()[i], outputFormat); resultList.Add(result); } } catch (TesseractException e) { String msg = MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED, e.Message); LogManager.GetLogger(GetType()).Error(msg); throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED); } finally { TesseractOcrUtil.DisposeTesseractInstance(GetTesseractInstance()); } return(resultList); }
public virtual void TestInitializeImagesListFromInvalidTiff() { String path = TEST_IMAGES_DIRECTORY + "example_03.tiff"; FileInfo imgFile = new FileInfo(path); TesseractOcrUtil tesseractOcrUtil = new TesseractOcrUtil(); tesseractOcrUtil.InitializeImagesListFromTiff(imgFile); NUnit.Framework.Assert.AreEqual(0, tesseractOcrUtil.GetListOfPages().Count); }