public virtual void InitTesseractProperties() { Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties(); ocrEngineProperties.SetPathToTessData(GetTessDataDirectory()); tesseractReader.SetTesseract4OcrEngineProperties(ocrEngineProperties); }
public virtual void TestIncorrectLanguageForUserWordsAsList() { NUnit.Framework.Assert.That(() => { Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetUserWords("eng1", JavaUtil.ArraysAsList("word1", "word2")); properties.SetLanguages(new List <String>()); } , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST, "eng1"))) ; }
public virtual void TestUserWordsWithLanguageNotInList() { NUnit.Framework.Assert.That(() => { String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt"; Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetUserWords("spa", new FileStream(userWords, FileMode.Open, FileAccess.Read)); properties.SetLanguages(new List <String>()); } , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST, "spa"))) ; }
public virtual void TestUserWordsFileNotDeleted() { String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt"; Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetPathToUserWordsFile(userWords); properties.SetLanguages(JavaUtil.ArraysAsList("eng")); tesseractReader.SetTesseract4OcrEngineProperties(properties); String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg"; tesseractReader.DoImageOcr(new FileInfo(imgPath)); NUnit.Framework.Assert.IsTrue(new FileInfo(userWords).Exists); }
public virtual void TestUserWordsWithDefaultLanguageNotInList() { String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt"; Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetUserWords("eng", new FileStream(userWords, FileMode.Open, FileAccess.Read)); properties.SetLanguages(new List <String>()); tesseractReader.SetTesseract4OcrEngineProperties(properties); String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg"; String expectedOutput = "619121"; String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath); NUnit.Framework.Assert.IsTrue(result.StartsWith(expectedOutput)); }
public virtual void TestCustomUserWords() { String imgPath = TEST_IMAGES_DIRECTORY + "wierdwords.png"; IList <String> userWords = JavaUtil.ArraysAsList("he23llo", "qwetyrtyqpwe-rty"); Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetLanguages(JavaUtil.ArraysAsList("fra")); properties.SetUserWords("fra", userWords); tesseractReader.SetTesseract4OcrEngineProperties(properties); String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath); NUnit.Framework.Assert.IsTrue(result.Contains(userWords[0]) || result.Contains(userWords[1])); NUnit.Framework.Assert.IsTrue(tesseractReader.GetTesseract4OcrEngineProperties().GetPathToUserWordsFile(). EndsWith(".user-words")); }
public virtual void TestDoTesseractOcrForNonAsciiPathForExecutable() { String path = TEST_IMAGES_DIRECTORY + "tèst/noisy_01.png"; FileInfo imgFile = new FileInfo(path); FileInfo outputFile = new FileInfo(TesseractOcrUtil.GetTempFilePath("test", ".hocr")); Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties(); properties.SetPathToTessData(GetTessDataDirectory()); properties.SetPreprocessingImages(false); Tesseract4ExecutableOcrEngine engine = new Tesseract4ExecutableOcrEngine(GetTesseractDirectory(), properties ); engine.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR); NUnit.Framework.Assert.IsTrue(File.Exists(System.IO.Path.Combine(outputFile.FullName))); TesseractHelper.DeleteFile(outputFile.FullName); NUnit.Framework.Assert.IsFalse(File.Exists(System.IO.Path.Combine(outputFile.FullName))); }
public virtual void TestCustomUserWordsWithListOfLanguages() { String imgPath = TEST_IMAGES_DIRECTORY + "bogusText.jpg"; String expectedOutput = "B1adeb1ab1a"; Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetLanguages(JavaUtil.ArraysAsList("fra", "eng")); properties.SetUserWords("eng", JavaUtil.ArraysAsList("b1adeb1ab1a")); tesseractReader.SetTesseract4OcrEngineProperties(properties); String result = GetRecognizedTextFromTextFile(tesseractReader, imgPath); result = result.Replace("\n", "").Replace("\f", ""); result = iText.IO.Util.StringUtil.ReplaceAll(result, "[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", ""); NUnit.Framework.Assert.IsTrue(result.StartsWith(expectedOutput)); NUnit.Framework.Assert.IsTrue(tesseractReader.GetTesseract4OcrEngineProperties().GetPathToUserWordsFile(). EndsWith(".user-words")); }
private void TestSettingOsName(String osName) { String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg"; FileInfo imgFile = new FileInfo(path); String tesseractDirectory = GetTesseractDirectory(); String osPropertyName = Environment.GetEnvironmentVariable("os.name") == null ? "OS" : "os.name"; String os = Environment.GetEnvironmentVariable(osPropertyName); Environment.SetEnvironmentVariable(osPropertyName, osName); try { Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties(); properties.SetPathToTessData(GetTessDataDirectory()); Tesseract4ExecutableOcrEngine engine = new Tesseract4ExecutableOcrEngine(tesseractDirectory, properties); engine.DoTesseractOcr(imgFile, null, OutputFormat.HOCR); } finally { Environment.SetEnvironmentVariable(osPropertyName, os); } }
/// <summary> /// Creates a new /// <see cref="Tesseract4LibOcrEngine"/> /// instance. /// </summary> /// <param name="tesseract4OcrEngineProperties">set of properteis</param> public Tesseract4LibOcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) : base(tesseract4OcrEngineProperties) { tesseractInstance = TesseractOcrUtil.InitializeTesseractInstance(IsWindows(), null, null, null); }
public AbstractTesseract4OcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) { this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties; }
/// <summary> /// Sets properties for /// <see cref="AbstractTesseract4OcrEngine"/>. /// </summary> /// <param name="tesseract4OcrEngineProperties"> /// set of properties /// <see cref="Tesseract4OcrEngineProperties"/> /// for /// <see cref="AbstractTesseract4OcrEngine"/> /// </param> public void SetTesseract4OcrEngineProperties(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) { this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties; }
/// <summary> /// Creates a new /// <see cref="Tesseract4ExecutableOcrEngine"/> /// instance. /// </summary> /// <param name="executablePath">path to tesseract executable</param> /// <param name="tesseract4OcrEngineProperties">set of properties</param> public Tesseract4ExecutableOcrEngine(String executablePath, Tesseract4OcrEngineProperties tesseract4OcrEngineProperties ) : base(tesseract4OcrEngineProperties) { SetPathToExecutable(executablePath); }
/// <summary> /// Creates a new /// <see cref="Tesseract4ExecutableOcrEngine"/> /// instance. /// </summary> /// <param name="tesseract4OcrEngineProperties">set of properties</param> public Tesseract4ExecutableOcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) : base(tesseract4OcrEngineProperties) { SetPathToExecutable("tesseract"); }