public virtual void CompareMultiLangImage() { String testName = "compareMultiLangImage"; String filename = "multilang"; String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + ".pdf"; String resultPdfPath = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf"; try { Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetTextPositioning(TextPositioning.BY_WORDS); properties.SetPathToTessData(GetTessDataDirectory()); properties.SetPageSegMode(3); tesseractReader.SetTesseract4OcrEngineProperties(properties); DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("eng", "deu", "spa"), DeviceCmyk.BLACK); NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, TEST_DOCUMENTS_DIRECTORY , "diff_")); } finally { NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties ().GetTextPositioning()); NUnit.Framework.Assert.AreEqual(3, tesseractReader.GetTesseract4OcrEngineProperties().GetPageSegMode().Value ); } }
public virtual void CompareInvoiceFrontThaiImage() { String testName = "compareInvoiceFrontThaiImage"; String filename = "invoice_front_thai"; //Tesseract for Java and Tesseract for .NET give different output //So we cannot use one reference pdf file for them String expectedPdfPathJava = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + "_java.pdf"; String expectedPdfPathDotNet = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + "_dotnet.pdf"; String resultPdfPath = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf"; Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetTextPositioning(TextPositioning.BY_WORDS_AND_LINES); properties.SetPathToTessData(GetTessDataDirectory()); properties.SetLanguages(JavaUtil.ArraysAsList("tha", "eng")); tesseractReader.SetTesseract4OcrEngineProperties(properties); DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("tha", "eng"), JavaUtil.ArraysAsList(NOTO_SANS_THAI_FONT_PATH, NOTO_SANS_FONT_PATH), DeviceRgb.RED); bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY , "diff_") == null; bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY , "diff_") == null; NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest); }
public virtual void TestSpanishPNG() { String testName = "compareSpanishPNG"; String filename = "scanned_spa_01"; String expectedText1 = "¿Y SI ENSAYARA COMO ACTUAR?"; String expectedText2 = "¿Y SI ENSAYARA ACTUAR?"; String resultPdfPath = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf"; IList <String> languages = JavaUtil.ArraysAsList("spa", "spa_old"); Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); if (isExecutableReaderType) { properties.SetPreprocessingImages(false); } // locate text by words properties.SetTextPositioning(TextPositioning.BY_WORDS); properties.SetLanguages(languages); tesseractReader.SetTesseract4OcrEngineProperties(properties); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetTextColor(DeviceCmyk.BLACK); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties); using (PdfWriter pdfWriter = GetPdfWriter(resultPdfPath)) { ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(TEST_IMAGES_DIRECTORY + filename + ".png")), pdfWriter).Close(); } try { String result = GetTextFromPdfLayer(resultPdfPath, null, 1).Replace("\n", " "); NUnit.Framework.Assert.IsTrue(result.Contains(expectedText1) || result.Contains(expectedText2)); } finally { NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties ().GetTextPositioning()); } }