Ejemplo n.º 1
0
        /// <summary>
        /// Reads
        /// <see cref="Tesseract.Pix"/>
        /// from input file or, if
        /// this is not possible, reads input file as
        /// <see cref="System.Drawing.Bitmap"/>
        /// and then converts to
        /// <see cref="Tesseract.Pix"/>.
        /// </summary>
        /// <param name="inputFile">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <returns>
        /// Pix result
        /// <see cref="Tesseract.Pix"/>
        /// object from
        /// input file
        /// </returns>
        internal static Pix ReadPix(FileInfo inputFile)
        {
            Pix pix = null;

            try {
                System.Drawing.Bitmap bufferedImage = iText.Pdfocr.Tesseract4.ImagePreprocessingUtil.ReadImageFromFile(inputFile
                                                                                                                       );
                if (bufferedImage != null)
                {
                    pix = TesseractOcrUtil.ConvertImageToPix(bufferedImage);
                }
            }
            catch (Exception e) {
                // NOSONAR
                LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).Info(MessageFormatUtil.Format
                                                                                                      (Tesseract4LogMessageConstant.CANNOT_CONVERT_IMAGE_TO_PIX, inputFile.FullName, e.Message));
            }
            if (pix == null)
            {
                try {
                    pix = Tesseract.Pix.LoadFromFile(inputFile.FullName);
                }
                catch (Exception e) {
                    // NOSONAR
                    LogManager.GetLogger(typeof(iText.Pdfocr.Tesseract4.ImagePreprocessingUtil)).Info(MessageFormatUtil.Format
                                                                                                          (Tesseract4LogMessageConstant.CANNOT_CONVERT_IMAGE_TO_PIX, inputFile.FullName, e.Message));
                }
            }
            return(pix);
        }
Ejemplo n.º 2
0
        /// <summary>Checks whether parent directories are equal for the passed file paths.</summary>
        /// <param name="firstPath">path to the first file</param>
        /// <param name="secondPath">path to the second file</param>
        /// <returns>true if parent directories are equal, otherwise - false</returns>
        private bool AreEqualParentDirectories(String firstPath, String secondPath)
        {
            String firstParentDir  = TesseractOcrUtil.GetParentDirectory(firstPath);
            String secondParentDir = TesseractOcrUtil.GetParentDirectory(secondPath);

            return(firstParentDir != null && firstParentDir.Equals(secondParentDir));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Gets OCR result from provided multi-page image and returns result as
        /// list of strings for each page.
        /// </summary>
        /// <remarks>
        /// Gets OCR result from provided multi-page image and returns result as
        /// list of strings for each page. This method is used for tiff images
        /// when preprocessing is not needed.
        /// </remarks>
        /// <param name="inputImage">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="outputFormat">
        /// selected
        /// <see cref="OutputFormat"/>
        /// for tesseract
        /// </param>
        /// <returns>
        /// list of result string that will be written to a temporary files
        /// later
        /// </returns>
        private IList <String> GetOcrResultForMultiPage(FileInfo inputImage, OutputFormat outputFormat)
        {
            IList <String> resultList = new List <String>();

            try {
                InitializeTesseract(outputFormat);
                TesseractOcrUtil util = new TesseractOcrUtil();
                util.InitializeImagesListFromTiff(inputImage);
                int numOfPages = util.GetListOfPages().Count;
                for (int i = 0; i < numOfPages; i++)
                {
                    String result = util.GetOcrResultAsString(GetTesseractInstance(), util.GetListOfPages()[i], outputFormat);
                    resultList.Add(result);
                }
            }
            catch (TesseractException e) {
                String msg = MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED, e.Message);
                LogManager.GetLogger(GetType()).Error(msg);
                throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
            }
            finally {
                TesseractOcrUtil.DisposeTesseractInstance(GetTesseractInstance());
            }
            return(resultList);
        }
Ejemplo n.º 4
0
        /// <summary>Preprocess given image if it is needed.</summary>
        /// <param name="inputImage">
        /// original input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="pageNumber">number of page to be OCRed</param>
        /// <returns>
        /// path to output image as
        /// <see cref="System.String"/>
        /// </returns>
        private String PreprocessImage(FileInfo inputImage, int pageNumber)
        {
            String tmpFileName = TesseractOcrUtil.GetTempFilePath(Guid.NewGuid().ToString(), GetExtension(inputImage));
            String path        = inputImage.FullName;

            try {
                if (GetTesseract4OcrEngineProperties().IsPreprocessingImages())
                {
                    Pix pix = ImagePreprocessingUtil.PreprocessImage(inputImage, pageNumber);
                    TesseractOcrUtil.SavePixToTempPngFile(tmpFileName, pix);
                    if (!File.Exists(System.IO.Path.Combine(tmpFileName)))
                    {
                        System.Drawing.Bitmap img = TesseractOcrUtil.ConvertPixToImage(pix);
                        if (img != null)
                        {
                            TesseractOcrUtil.SaveImageToTempPngFile(tmpFileName, img);
                        }
                    }
                }
                if (!GetTesseract4OcrEngineProperties().IsPreprocessingImages() || !File.Exists(System.IO.Path.Combine(tmpFileName
                                                                                                                       )))
                {
                    TesseractOcrUtil.CreateTempFileCopy(path, tmpFileName);
                }
                if (File.Exists(System.IO.Path.Combine(tmpFileName)))
                {
                    path = tmpFileName;
                }
            }
            catch (System.IO.IOException e) {
                LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE
                                                                               , e.Message));
            }
            return(path);
        }
Ejemplo n.º 5
0
        public virtual void TestReadingPageFromInvalidTiff()
        {
            String   path    = TEST_IMAGES_DIRECTORY + "example_03.tiff";
            FileInfo imgFile = new FileInfo(path);
            Pix      page    = TesseractOcrUtil.ReadPixPageFromTiff(imgFile, 0);

            NUnit.Framework.Assert.IsNull(page);
        }
Ejemplo n.º 6
0
        public virtual void TestImageSavingAsPngWithError()
        {
            String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";

            System.Drawing.Bitmap bi = (System.Drawing.Bitmap)System.Drawing.Image.FromStream(new FileStream(path, FileMode.Open
                                                                                                             , FileAccess.Read));
            TesseractOcrUtil.SaveImageToTempPngFile(null, bi);
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Performs tesseract OCR using wrapper for Tesseract OCR API for the selected page
 /// of input image (by default 1st).
 /// </summary>
 /// <remarks>
 /// Performs tesseract OCR using wrapper for Tesseract OCR API for the selected page
 /// of input image (by default 1st).
 /// Please note that list of output files is accepted instead of a single file because
 /// page number parameter is not respected in case of TIFF images not requiring preprocessing.
 /// In other words, if the passed image is the TIFF image and according to the
 /// <see cref="Tesseract4OcrEngineProperties"/>
 /// no preprocessing is needed, each page of the TIFF image is OCRed and the number of output files in the list
 /// is expected to be same as number of pages in the image, otherwise, only one file is expected
 /// </remarks>
 /// <param name="inputImage">
 /// input image
 /// <see cref="System.IO.FileInfo"/>
 /// </param>
 /// <param name="outputFiles">
 ///
 /// <see cref="System.Collections.IList{E}"/>
 /// of output files
 /// (one per each page)
 /// </param>
 /// <param name="outputFormat">
 /// selected
 /// <see cref="OutputFormat"/>
 /// for tesseract
 /// </param>
 /// <param name="pageNumber">number of page to be processed</param>
 internal override void DoTesseractOcr(FileInfo inputImage, IList <FileInfo> outputFiles, OutputFormat outputFormat
                                       , int pageNumber)
 {
     ScheduledCheck();
     try {
         ValidateLanguages(GetTesseract4OcrEngineProperties().GetLanguages());
         InitializeTesseract(outputFormat);
         OnEvent();
         // if preprocessing is not needed and provided image is tiff,
         // the image will be paginated and separate pages will be OCRed
         IList <String> resultList = new List <String>();
         if (!GetTesseract4OcrEngineProperties().IsPreprocessingImages() && ImagePreprocessingUtil.IsTiffImage(inputImage
                                                                                                               ))
         {
             resultList = GetOcrResultForMultiPage(inputImage, outputFormat);
         }
         else
         {
             resultList.Add(GetOcrResultForSinglePage(inputImage, outputFormat, pageNumber));
         }
         // list of result strings is written to separate files
         // (one for each page)
         for (int i = 0; i < resultList.Count; i++)
         {
             String   result     = resultList[i];
             FileInfo outputFile = i >= outputFiles.Count ? null : outputFiles[i];
             if (result != null && outputFile != null)
             {
                 try {
                     using (TextWriter writer = new StreamWriter(new FileStream(outputFile.FullName, FileMode.Create), System.Text.Encoding
                                                                 .UTF8)) {
                         writer.Write(result);
                     }
                 }
                 catch (System.IO.IOException e) {
                     LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_WRITE_TO_FILE
                                                                                    , e.Message));
                     throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
                 }
             }
         }
     }
     catch (Tesseract4OcrException e) {
         LogManager.GetLogger(GetType()).Error(e.Message);
         throw new Tesseract4OcrException(e.Message, e);
     }
     finally {
         if (tesseractInstance != null)
         {
             TesseractOcrUtil.DisposeTesseractInstance(tesseractInstance);
         }
         if (GetTesseract4OcrEngineProperties().GetPathToUserWordsFile() != null && GetTesseract4OcrEngineProperties
                 ().IsUserWordsFileTemporary())
         {
             TesseractHelper.DeleteFile(GetTesseract4OcrEngineProperties().GetPathToUserWordsFile());
         }
     }
 }
Ejemplo n.º 8
0
        public virtual void TestInitializeImagesListFromInvalidTiff()
        {
            String           path             = TEST_IMAGES_DIRECTORY + "example_03.tiff";
            FileInfo         imgFile          = new FileInfo(path);
            TesseractOcrUtil tesseractOcrUtil = new TesseractOcrUtil();

            tesseractOcrUtil.InitializeImagesListFromTiff(imgFile);
            NUnit.Framework.Assert.AreEqual(0, tesseractOcrUtil.GetListOfPages().Count);
        }
Ejemplo n.º 9
0
        public virtual void TestPreprocessingConditions()
        {
            Pix pix = null;

            NUnit.Framework.Assert.IsNull(TesseractOcrUtil.ConvertToGrayscale(pix));
            NUnit.Framework.Assert.IsNull(TesseractOcrUtil.OtsuImageThresholding(pix));
            NUnit.Framework.Assert.IsNull(TesseractOcrUtil.ConvertPixToImage(pix));
            TesseractOcrUtil.DestroyPix(pix);
        }
Ejemplo n.º 10
0
        public virtual void TestNullSavingAsPng()
        {
            String tmpFileName = TesseractOcrUtil.GetTempFilePath(GetTargetDirectory() + "/testNullSavingAsPng", ".png"
                                                                  );

            TesseractOcrUtil.SaveImageToTempPngFile(tmpFileName, null);
            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(tmpFileName)));
            TesseractOcrUtil.SavePixToTempPngFile(tmpFileName, null);
            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(tmpFileName)));
        }
Ejemplo n.º 11
0
        public virtual void TestImageSavingAsPng()
        {
            String path        = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
            String tmpFileName = GetTargetDirectory() + "testImageSavingAsPng.png";

            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(tmpFileName)));
            System.Drawing.Bitmap bi = (System.Drawing.Bitmap)System.Drawing.Image.FromStream(new FileStream(path, FileMode.Open
                                                                                                             , FileAccess.Read));
            TesseractOcrUtil.SaveImageToTempPngFile(tmpFileName, bi);
            NUnit.Framework.Assert.IsTrue(File.Exists(System.IO.Path.Combine(tmpFileName)));
            TesseractHelper.DeleteFile(tmpFileName);
            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(tmpFileName)));
        }
Ejemplo n.º 12
0
        public virtual void TestPixSavingAsPng()
        {
            String path        = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
            String tmpFileName = GetTargetDirectory() + "testPixSavingAsPng.png";

            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(tmpFileName)));
            Pix pix = ImagePreprocessingUtil.ReadPix(new FileInfo(path));

            TesseractOcrUtil.SavePixToTempPngFile(tmpFileName, pix);
            NUnit.Framework.Assert.IsTrue(File.Exists(System.IO.Path.Combine(tmpFileName)));
            TesseractHelper.DeleteFile(tmpFileName);
            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(tmpFileName)));
        }
Ejemplo n.º 13
0
        public virtual void TestGetOcrResultAsStringForFile()
        {
            String   path     = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
            String   expected = "619121";
            FileInfo imgFile  = new FileInfo(path);
            Tesseract4LibOcrEngine tesseract4LibOcrEngine = GetTesseract4LibOcrEngine();

            tesseract4LibOcrEngine.SetTesseract4OcrEngineProperties(new Tesseract4OcrEngineProperties().SetPathToTessData
                                                                        (GetTessDataDirectory()));
            tesseract4LibOcrEngine.InitializeTesseract(OutputFormat.TXT);
            String result = new TesseractOcrUtil().GetOcrResultAsString(tesseract4LibOcrEngine.GetTesseractInstance(),
                                                                        imgFile, OutputFormat.TXT);

            NUnit.Framework.Assert.IsTrue(result.Contains(expected));
        }
Ejemplo n.º 14
0
        public virtual void TestTesseract4OcrForPix()
        {
            String   path     = TEST_IMAGES_DIRECTORY + "numbers_02.jpg";
            String   expected = "0123456789";
            FileInfo imgFile  = new FileInfo(path);
            Pix      pix      = ImagePreprocessingUtil.ReadPix(imgFile);
            Tesseract4LibOcrEngine tesseract4LibOcrEngine = GetTesseract4LibOcrEngine();

            tesseract4LibOcrEngine.SetTesseract4OcrEngineProperties(new Tesseract4OcrEngineProperties().SetPathToTessData
                                                                        (GetTessDataDirectory()));
            tesseract4LibOcrEngine.InitializeTesseract(OutputFormat.TXT);
            String result = new TesseractOcrUtil().GetOcrResultAsString(tesseract4LibOcrEngine.GetTesseractInstance(),
                                                                        pix, OutputFormat.TXT);

            NUnit.Framework.Assert.IsTrue(result.Contains(expected));
        }
Ejemplo n.º 15
0
        public virtual void TestDoTesseractOcrForNonAsciiPathForExecutable()
        {
            String   path       = TEST_IMAGES_DIRECTORY + "tèst/noisy_01.png";
            FileInfo imgFile    = new FileInfo(path);
            FileInfo outputFile = new FileInfo(TesseractOcrUtil.GetTempFilePath("test", ".hocr"));
            Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties();

            properties.SetPathToTessData(GetTessDataDirectory());
            properties.SetPreprocessingImages(false);
            Tesseract4ExecutableOcrEngine engine = new Tesseract4ExecutableOcrEngine(GetTesseractDirectory(), properties
                                                                                     );

            engine.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
            NUnit.Framework.Assert.IsTrue(File.Exists(System.IO.Path.Combine(outputFile.FullName)));
            TesseractHelper.DeleteFile(outputFile.FullName);
            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(outputFile.FullName)));
        }
Ejemplo n.º 16
0
        /// <summary>Performs basic image preprocessing using buffered image (if provided).</summary>
        /// <remarks>
        /// Performs basic image preprocessing using buffered image (if provided).
        /// Preprocessed image will be saved in temporary directory.
        /// </remarks>
        /// <param name="inputFile">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="pageNumber">number of page to be preprocessed</param>
        /// <returns>
        /// created preprocessed image as
        /// <see cref="Tesseract.Pix"/>
        /// </returns>
        internal static Pix PreprocessImage(FileInfo inputFile, int pageNumber)
        {
            Pix pix = null;

            // read image
            if (IsTiffImage(inputFile))
            {
                pix = TesseractOcrUtil.ReadPixPageFromTiff(inputFile, pageNumber - 1);
            }
            else
            {
                pix = ReadPix(inputFile);
            }
            if (pix == null)
            {
                throw new Tesseract4OcrException(Tesseract4OcrException.CANNOT_READ_PROVIDED_IMAGE).SetMessageParams(inputFile
                                                                                                                     .FullName);
            }
            return(TesseractOcrUtil.PreprocessPix(pix));
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Gets OCR result from provided single page image and preprocesses it if
        /// it is needed.
        /// </summary>
        /// <param name="inputImage">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <param name="outputFormat">
        /// selected
        /// <see cref="OutputFormat"/>
        /// for tesseract
        /// </param>
        /// <param name="pageNumber">number of page to be OCRed</param>
        /// <returns>result as string that will be written to a temporary file later</returns>
        private String GetOcrResultForSinglePage(FileInfo inputImage, OutputFormat outputFormat, int pageNumber)
        {
            String result = null;

            try {
                // preprocess if required
                if (GetTesseract4OcrEngineProperties().IsPreprocessingImages())
                {
                    // preprocess and try to ocr
                    result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), ImagePreprocessingUtil.PreprocessImage
                                                                             (inputImage, pageNumber), outputFormat);
                }
                if (result == null)
                {
                    System.Drawing.Bitmap bufferedImage = ImagePreprocessingUtil.ReadImage(inputImage);
                    if (bufferedImage != null)
                    {
                        try {
                            result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), bufferedImage, outputFormat);
                        }
                        catch (Exception e) {
                            // NOSONAR
                            LogManager.GetLogger(GetType()).Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE
                                                                                          , e.Message));
                        }
                    }
                    if (result == null)
                    {
                        // perform ocr using original input image
                        result = new TesseractOcrUtil().GetOcrResultAsString(GetTesseractInstance(), inputImage, outputFormat);
                    }
                }
            }
            catch (Exception e) {
                // NOSONAR
                LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.TESSERACT_FAILED
                                                                               , e.Message));
                throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED);
            }
            return(result);
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Create list of parameters for command moving to the image parent
        /// directory.
        /// </summary>
        /// <param name="imagePath">path to input image</param>
        /// <returns>command list</returns>
        private IList <String> MoveToImageDirectory(String imagePath)
        {
            // go the image parent directory
            IList <String> @params     = new List <String>();
            String         parent      = TesseractOcrUtil.GetParentDirectory(imagePath);
            String         replacement = IsWindows() ? "" : "/";

            parent = parent.Replace("file:///", replacement).Replace("file:/", replacement);
            // Use "/d" parameter to handle cases when the current directory on Windows
            // is located on a different drive compared to the directory we move to
            if (IsWindows())
            {
                @params.Add("cd /d");
            }
            else
            {
                @params.Add("cd");
            }
            @params.Add(AddQuotes(parent));
            return(@params);
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Using provided input stream there will be created
        /// temporary file (with name 'language.user-words')
        /// containing words (one per line) which ends with
        /// a new line character.
        /// </summary>
        /// <remarks>
        /// Using provided input stream there will be created
        /// temporary file (with name 'language.user-words')
        /// containing words (one per line) which ends with
        /// a new line character. Train data for provided language
        /// should exist in specified tess data directory.
        /// NOTE:
        /// User words dictionary doesn't work properly in tesseract4
        /// and hidden for public usage until fix is available
        /// </remarks>
        /// <param name="language">
        /// language as
        /// <see cref="System.String"/>
        /// , tessdata for
        /// this languages has to exist in tess data directory
        /// </param>
        /// <param name="inputStream">
        /// custom user words as
        /// <see cref="System.IO.Stream"/>
        /// </param>
        /// <returns>
        /// the
        /// <see cref="Tesseract4OcrEngineProperties"/>
        /// instance
        /// </returns>
        internal virtual iText.Pdfocr.Tesseract4.Tesseract4OcrEngineProperties SetUserWords(String language, Stream
                                                                                            inputStream)
        {
            SetPathToUserWordsFile(null);
            if (!GetLanguages().Contains(language))
            {
                if (DEFAULT_LANGUAGE.Equals(language.ToLowerInvariant()))
                {
                    IList <String> languagesList = GetLanguages();
                    languagesList.Add(language);
                    SetLanguages(languagesList);
                }
                else
                {
                    throw new Tesseract4OcrException(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST).SetMessageParams(language
                                                                                                                          );
                }
            }
            String userWordsFileName = TesseractOcrUtil.GetTempFilePath(language, "." + DEFAULT_USER_WORDS_SUFFIX);

            try {
                using (StreamWriter writer = new StreamWriter(userWordsFileName)) {
                    TextReader reader = new StreamReader(inputStream, System.Text.Encoding.UTF8);
                    int        data;
                    while ((data = reader.Read()) != -1)
                    {
                        writer.Write(data);
                    }
                    writer.Write(Environment.NewLine);
                    SetPathToUserWordsFile(userWordsFileName, true);
                }
            }
            catch (System.IO.IOException e) {
                SetPathToUserWordsFile(null);
                LogManager.GetLogger(GetType()).Warn(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_USE_USER_WORDS
                                                                              , e.Message));
            }
            return(this);
        }
Ejemplo n.º 20
0
 /// <summary>
 /// Initializes instance of tesseract if it haven't been already
 /// initialized or it have been disposed and sets all the required
 /// properties.
 /// </summary>
 /// <param name="outputFormat">
 /// selected
 /// <see cref="OutputFormat"/>
 /// for tesseract
 /// </param>
 public virtual void InitializeTesseract(OutputFormat outputFormat)
 {
     if (GetTesseractInstance() == null || TesseractOcrUtil.IsTesseractInstanceDisposed(GetTesseractInstance())
         )
     {
         tesseractInstance = TesseractOcrUtil.InitializeTesseractInstance(IsWindows(), GetTessData(), GetLanguagesAsString
                                                                              (), GetTesseract4OcrEngineProperties().GetPathToUserWordsFile());
     }
     GetTesseractInstance().SetVariable("tessedit_create_hocr", outputFormat.Equals(OutputFormat.HOCR) ? "1" :
                                        "0");
     GetTesseractInstance().SetVariable("user_defined_dpi", "300");
     if (GetTesseract4OcrEngineProperties().GetPathToUserWordsFile() != null)
     {
         GetTesseractInstance().SetVariable("load_system_dawg", "0");
         GetTesseractInstance().SetVariable("load_freq_dawg", "0");
         GetTesseractInstance().SetVariable("user_words_suffix", GetTesseract4OcrEngineProperties().GetDefaultUserWordsSuffix
                                                ());
         GetTesseractInstance().SetVariable("user_words_file", GetTesseract4OcrEngineProperties().GetPathToUserWordsFile
                                                ());
     }
     TesseractOcrUtil.SetTesseractProperties(GetTesseractInstance(), GetTessData(), GetLanguagesAsString(), GetTesseract4OcrEngineProperties
                                                 ().GetPageSegMode(), GetTesseract4OcrEngineProperties().GetPathToUserWordsFile());
 }
Ejemplo n.º 21
0
 /// <summary>
 /// Creates a new
 /// <see cref="Tesseract4LibOcrEngine"/>
 /// instance.
 /// </summary>
 /// <param name="tesseract4OcrEngineProperties">set of properteis</param>
 public Tesseract4LibOcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties)
     : base(tesseract4OcrEngineProperties)
 {
     tesseractInstance = TesseractOcrUtil.InitializeTesseractInstance(IsWindows(), null, null, null);
 }
Ejemplo n.º 22
0
        /// <summary>
        /// Reads input file as Leptonica
        /// <see cref="Tesseract.Pix"/>
        /// and
        /// converts it to
        /// <see cref="System.Drawing.Bitmap"/>.
        /// </summary>
        /// <param name="inputImage">
        /// input image
        /// <see cref="System.IO.FileInfo"/>
        /// </param>
        /// <returns>
        /// returns a
        /// <see cref="System.Drawing.Bitmap"/>
        /// as the result
        /// </returns>
        internal static System.Drawing.Bitmap ReadAsPixAndConvertToBufferedImage(FileInfo inputImage)
        {
            Pix pix = Tesseract.Pix.LoadFromFile(inputImage.FullName);

            return(TesseractOcrUtil.ConvertPixToImage(pix));
        }
Ejemplo n.º 23
0
        /// <summary>Creates a temporary file with given extension.</summary>
        /// <param name="extension">
        /// file extension for a new file
        /// <see cref="System.String"/>
        /// </param>
        /// <returns>
        /// a new created
        /// <see cref="System.IO.FileInfo"/>
        /// instance
        /// </returns>
        private FileInfo CreateTempFile(String extension)
        {
            String tmpFileName = TesseractOcrUtil.GetTempFilePath(Guid.NewGuid().ToString(), extension);

            return(new FileInfo(tmpFileName));
        }