示例#1
0
        /// <summary>
        /// Gets OCR result from provided multi-page image and returns result as
        /// list of strings for each page.
        /// </summary>
        /// <remarks>
        /// Gets OCR result from provided multi-page image and returns result as
        /// list of strings for each page. This method is used for tiff images
        /// when preprocessing is not needed.
        /// </remarks>
        /// <param name="inputImage">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="outputFormat">
        /// selected
        /// <see cref="OutputFormat"/>
        /// for tesseract
        /// </param>
        /// <returns>
        /// list of result string that will be written to a temporary files
        /// later
        /// </returns>
        private IList <String> GetOcrResultForMultiPage(FileInfo inputImage, OutputFormat outputFormat)
        {
            IList <String> resultList = new List <String>();

            try {
                InitializeTesseract(outputFormat);
                TesseractOcrUtil util = new TesseractOcrUtil();
                util.InitializeImagesListFromTiff(inputImage);
                int numOfPages = util.GetListOfPages().Count;
                for (int i = 0; i < numOfPages; i++)
                {
                    String result = util.GetOcrResultAsString(GetTesseractInstance(), util.GetListOfPages()[i], outputFormat);
                    resultList.Add(result);
                }
            }
            catch (TesseractException e) {
                String msg = MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED, e.Message);
                LogManager.GetLogger(GetType()).Error(msg);
                throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
            }
            finally {
                TesseractOcrUtil.DisposeTesseractInstance(GetTesseractInstance());
            }
            return(resultList);
        }