Ejemplo n.º 1
0
        public virtual void InitTesseractProperties()
        {
            Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties();

            ocrEngineProperties.SetPathToTessData(GetTessDataDirectory());
            tesseractReader.SetTesseract4OcrEngineProperties(ocrEngineProperties);
        }
Ejemplo n.º 2
0
 public virtual void TestIncorrectLanguageForUserWordsAsList()
 {
     NUnit.Framework.Assert.That(() => {
         Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
         properties.SetUserWords("eng1", JavaUtil.ArraysAsList("word1", "word2"));
         properties.SetLanguages(new List <String>());
     }
                                 , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST, "eng1")))
     ;
 }
Ejemplo n.º 3
0
 public virtual void TestUserWordsWithLanguageNotInList()
 {
     NUnit.Framework.Assert.That(() => {
         String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt";
         Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
         properties.SetUserWords("spa", new FileStream(userWords, FileMode.Open, FileAccess.Read));
         properties.SetLanguages(new List <String>());
     }
                                 , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST, "spa")))
     ;
 }
Ejemplo n.º 4
0
        public virtual void TestUserWordsFileNotDeleted()
        {
            String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt";
            Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();

            properties.SetPathToUserWordsFile(userWords);
            properties.SetLanguages(JavaUtil.ArraysAsList("eng"));
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";

            tesseractReader.DoImageOcr(new FileInfo(imgPath));
            NUnit.Framework.Assert.IsTrue(new FileInfo(userWords).Exists);
        }
Ejemplo n.º 5
0
        public virtual void TestUserWordsWithDefaultLanguageNotInList()
        {
            String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt";
            Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();

            properties.SetUserWords("eng", new FileStream(userWords, FileMode.Open, FileAccess.Read));
            properties.SetLanguages(new List <String>());
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            String imgPath        = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
            String expectedOutput = "619121";
            String result         = GetRecognizedTextFromTextFile(tesseractReader, imgPath);

            NUnit.Framework.Assert.IsTrue(result.StartsWith(expectedOutput));
        }
Ejemplo n.º 6
0
        public virtual void TestCustomUserWords()
        {
            String         imgPath   = TEST_IMAGES_DIRECTORY + "wierdwords.png";
            IList <String> userWords = JavaUtil.ArraysAsList("he23llo", "qwetyrtyqpwe-rty");
            Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();

            properties.SetLanguages(JavaUtil.ArraysAsList("fra"));
            properties.SetUserWords("fra", userWords);
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath);

            NUnit.Framework.Assert.IsTrue(result.Contains(userWords[0]) || result.Contains(userWords[1]));
            NUnit.Framework.Assert.IsTrue(tesseractReader.GetTesseract4OcrEngineProperties().GetPathToUserWordsFile().
                                          EndsWith(".user-words"));
        }
Ejemplo n.º 7
0
        public virtual void TestDoTesseractOcrForNonAsciiPathForExecutable()
        {
            String   path       = TEST_IMAGES_DIRECTORY + "tèst/noisy_01.png";
            FileInfo imgFile    = new FileInfo(path);
            FileInfo outputFile = new FileInfo(TesseractOcrUtil.GetTempFilePath("test", ".hocr"));
            Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties();

            properties.SetPathToTessData(GetTessDataDirectory());
            properties.SetPreprocessingImages(false);
            Tesseract4ExecutableOcrEngine engine = new Tesseract4ExecutableOcrEngine(GetTesseractDirectory(), properties
                                                                                     );

            engine.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
            NUnit.Framework.Assert.IsTrue(File.Exists(System.IO.Path.Combine(outputFile.FullName)));
            TesseractHelper.DeleteFile(outputFile.FullName);
            NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(outputFile.FullName)));
        }
Ejemplo n.º 8
0
        public virtual void TestCustomUserWordsWithListOfLanguages()
        {
            String imgPath        = TEST_IMAGES_DIRECTORY + "bogusText.jpg";
            String expectedOutput = "B1adeb1ab1a";
            Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();

            properties.SetLanguages(JavaUtil.ArraysAsList("fra", "eng"));
            properties.SetUserWords("eng", JavaUtil.ArraysAsList("b1adeb1ab1a"));
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath);

            result = result.Replace("\n", "").Replace("\f", "");
            result = iText.IO.Util.StringUtil.ReplaceAll(result, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
            NUnit.Framework.Assert.IsTrue(result.StartsWith(expectedOutput));
            NUnit.Framework.Assert.IsTrue(tesseractReader.GetTesseract4OcrEngineProperties().GetPathToUserWordsFile().
                                          EndsWith(".user-words"));
        }
Ejemplo n.º 9
0
        private void TestSettingOsName(String osName)
        {
            String   path               = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
            FileInfo imgFile            = new FileInfo(path);
            String   tesseractDirectory = GetTesseractDirectory();
            String   osPropertyName     = Environment.GetEnvironmentVariable("os.name") == null ? "OS" : "os.name";
            String   os = Environment.GetEnvironmentVariable(osPropertyName);

            Environment.SetEnvironmentVariable(osPropertyName, osName);
            try {
                Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties();
                properties.SetPathToTessData(GetTessDataDirectory());
                Tesseract4ExecutableOcrEngine engine = new Tesseract4ExecutableOcrEngine(tesseractDirectory, properties);
                engine.DoTesseractOcr(imgFile, null, OutputFormat.HOCR);
            }
            finally {
                Environment.SetEnvironmentVariable(osPropertyName, os);
            }
        }
Ejemplo n.º 10
0
 /// <summary>
 /// Creates a new
 /// <see cref="Tesseract4LibOcrEngine"/>
 /// instance.
 /// </summary>
 /// <param name="tesseract4OcrEngineProperties">set of properteis</param>
 public Tesseract4LibOcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties)
     : base(tesseract4OcrEngineProperties)
 {
     tesseractInstance = TesseractOcrUtil.InitializeTesseractInstance(IsWindows(), null, null, null);
 }
Ejemplo n.º 11
0
 public AbstractTesseract4OcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties)
 {
     this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
 }
Ejemplo n.º 12
0
 /// <summary>
 /// Sets properties for
 /// <see cref="AbstractTesseract4OcrEngine"/>.
 /// </summary>
 /// <param name="tesseract4OcrEngineProperties">
 /// set of properties
 /// <see cref="Tesseract4OcrEngineProperties"/>
 /// for
 /// <see cref="AbstractTesseract4OcrEngine"/>
 /// </param>
 public void SetTesseract4OcrEngineProperties(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties)
 {
     this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
 }
Ejemplo n.º 13
0
 /// <summary>
 /// Creates a new
 /// <see cref="Tesseract4ExecutableOcrEngine"/>
 /// instance.
 /// </summary>
 /// <param name="executablePath">path to tesseract executable</param>
 /// <param name="tesseract4OcrEngineProperties">set of properties</param>
 public Tesseract4ExecutableOcrEngine(String executablePath, Tesseract4OcrEngineProperties tesseract4OcrEngineProperties
                                      )
     : base(tesseract4OcrEngineProperties)
 {
     SetPathToExecutable(executablePath);
 }
Ejemplo n.º 14
0
 /// <summary>
 /// Creates a new
 /// <see cref="Tesseract4ExecutableOcrEngine"/>
 /// instance.
 /// </summary>
 /// <param name="tesseract4OcrEngineProperties">set of properties</param>
 public Tesseract4ExecutableOcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties)
     : base(tesseract4OcrEngineProperties)
 {
     SetPathToExecutable("tesseract");
 }