C# (CSharp) iText.Pdfocr.Tesseract4 TesseractOcrUtil.DisposeTesseractInstance Examples

Programming Language: C# (CSharp)

Namespace/Package Name: iText.Pdfocr.Tesseract4

Class/Type: TesseractOcrUtil

Method/Function: DisposeTesseractInstance

Examples at hotexamples.com: 2

C# (CSharp) iText.Pdfocr.Tesseract4 TesseractOcrUtil.DisposeTesseractInstance - 2 examples found. These are the top rated real world C# (CSharp) examples of iText.Pdfocr.Tesseract4.TesseractOcrUtil.DisposeTesseractInstance extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetTempFilePath(5)

SaveImageToTempPngFile(4)

ConvertPixToImage(3)

SavePixToTempPngFile(3)

GetParentDirectory(2)

ReadPixPageFromTiff(2)

InitializeTesseractInstance(2)

InitializeImagesListFromTiff(2)

Contains(2)

GetListOfPages(2)

DisposeTesseractInstance(2)

GetOcrResultAsString(1)

ConvertImageToPix(1)

IsTesseractInstanceDisposed(1)

OtsuImageThresholding(1)

PreprocessPix(1)

DestroyPix(1)

CreateTempFileCopy(1)

ConvertToGrayscale(1)

SetTesseractProperties(1)

Example #1

Show file

        /// <summary>
        /// Gets OCR result from provided multi-page image and returns result as
        /// list of strings for each page.
        /// </summary>
        /// <remarks>
        /// Gets OCR result from provided multi-page image and returns result as
        /// list of strings for each page. This method is used for tiff images
        /// when preprocessing is not needed.
        /// </remarks>
        /// <param name="inputImage">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="outputFormat">
        /// selected
        /// <see cref="OutputFormat"/>
        /// for tesseract
        /// </param>
        /// <returns>
        /// list of result string that will be written to a temporary files
        /// later
        /// </returns>
        private IList <String> GetOcrResultForMultiPage(FileInfo inputImage, OutputFormat outputFormat)
        {
            IList <String> resultList = new List <String>();

            try {
                InitializeTesseract(outputFormat);
                TesseractOcrUtil util = new TesseractOcrUtil();
                util.InitializeImagesListFromTiff(inputImage);
                int numOfPages = util.GetListOfPages().Count;
                for (int i = 0; i < numOfPages; i++)
                {
                    String result = util.GetOcrResultAsString(GetTesseractInstance(), util.GetListOfPages()[i], outputFormat);
                    resultList.Add(result);
                }
            }
            catch (TesseractException e) {
                String msg = MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED, e.Message);
                LogManager.GetLogger(GetType()).Error(msg);
                throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
            }
            finally {
                TesseractOcrUtil.DisposeTesseractInstance(GetTesseractInstance());
            }
            return(resultList);
        }

Example #2

Show file

 /// <summary>
 /// Performs tesseract OCR using wrapper for Tesseract OCR API for the selected page
 /// of input image (by default 1st).
 /// </summary>
 /// <remarks>
 /// Performs tesseract OCR using wrapper for Tesseract OCR API for the selected page
 /// of input image (by default 1st).
 /// Please note that list of output files is accepted instead of a single file because
 /// page number parameter is not respected in case of TIFF images not requiring preprocessing.
 /// In other words, if the passed image is the TIFF image and according to the
 /// <see cref="Tesseract4OcrEngineProperties"/>
 /// no preprocessing is needed, each page of the TIFF image is OCRed and the number of output files in the list
 /// is expected to be same as number of pages in the image, otherwise, only one file is expected
 /// </remarks>
 /// <param name="inputImage">
 /// input image
 /// <see cref="System.IO.FileInfo"/>
 /// </param>
 /// <param name="outputFiles">
 ///
 /// <see cref="System.Collections.IList{E}"/>
 /// of output files
 /// (one per each page)
 /// </param>
 /// <param name="outputFormat">
 /// selected
 /// <see cref="OutputFormat"/>
 /// for tesseract
 /// </param>
 /// <param name="pageNumber">number of page to be processed</param>
 internal override void DoTesseractOcr(FileInfo inputImage, IList <FileInfo> outputFiles, OutputFormat outputFormat
                                       , int pageNumber)
 {
     ScheduledCheck();
     try {
         ValidateLanguages(GetTesseract4OcrEngineProperties().GetLanguages());
         InitializeTesseract(outputFormat);
         OnEvent();
         // if preprocessing is not needed and provided image is tiff,
         // the image will be paginated and separate pages will be OCRed
         IList <String> resultList = new List <String>();
         if (!GetTesseract4OcrEngineProperties().IsPreprocessingImages() && ImagePreprocessingUtil.IsTiffImage(inputImage
                                                                                                               ))
         {
             resultList = GetOcrResultForMultiPage(inputImage, outputFormat);
         }
         else
         {
             resultList.Add(GetOcrResultForSinglePage(inputImage, outputFormat, pageNumber));
         }
         // list of result strings is written to separate files
         // (one for each page)
         for (int i = 0; i < resultList.Count; i++)
         {
             String   result     = resultList[i];
             FileInfo outputFile = i >= outputFiles.Count ? null : outputFiles[i];
             if (result != null && outputFile != null)
             {
                 try {
                     using (TextWriter writer = new StreamWriter(new FileStream(outputFile.FullName, FileMode.Create), System.Text.Encoding
                                                                 .UTF8)) {
                         writer.Write(result);
                     }
                 }
                 catch (System.IO.IOException e) {
                     LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_WRITE_TO_FILE
                                                                                    , e.Message));
                     throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
                 }
             }
         }
     }
     catch (Tesseract4OcrException e) {
         LogManager.GetLogger(GetType()).Error(e.Message);
         throw new Tesseract4OcrException(e.Message, e);
     }
     finally {
         if (tesseractInstance != null)
         {
             TesseractOcrUtil.DisposeTesseractInstance(tesseractInstance);
         }
         if (GetTesseract4OcrEngineProperties().GetPathToUserWordsFile() != null && GetTesseract4OcrEngineProperties
                 ().IsUserWordsFileTemporary())
         {
             TesseractHelper.DeleteFile(GetTesseract4OcrEngineProperties().GetPathToUserWordsFile());
         }
     }
 }