public virtual void ComparePdfA3uRGBSpanishJPG() { String testName = "comparePdfA3uRGBSpanishJPG"; String filename = "spanish_01"; String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_a3u.pdf"; String resultPdfPath = GetTargetDirectory() + filename + "_" + testName + "_a3u.pdf"; Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties ()); properties.SetPathToTessData(GetTessDataDirectory()); properties.SetLanguages(JavaCollectionsUtil.SingletonList <String>("spa")); tesseractReader.SetTesseract4OcrEngineProperties(properties); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTitle(""); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties); PdfDocument doc = ocrPdfCreator.CreatePdfA(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(TEST_IMAGES_DIRECTORY + filename + ".jpg")), GetPdfWriter(resultPdfPath), GetRGBPdfOutputIntent()); NUnit.Framework.Assert.IsNotNull(doc); doc.Close(); NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, GetTargetDirectory (), "diff_")); }
public virtual void InitTesseractProperties() { Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties(); ocrEngineProperties.SetPathToTessData(new FileInfo(sourceFolder + "../../tessdata")); tesseractReader.SetTesseract4OcrEngineProperties(ocrEngineProperties); }
public virtual void InitTesseractProperties() { Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties(); ocrEngineProperties.SetPathToTessData(GetTessDataDirectory()); tesseractReader.SetTesseract4OcrEngineProperties(ocrEngineProperties); }
public virtual void CompareMultiLangImage() { String testName = "compareMultiLangImage"; String filename = "multilang"; String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + ".pdf"; String resultPdfPath = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf"; try { Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetTextPositioning(TextPositioning.BY_WORDS); properties.SetPathToTessData(GetTessDataDirectory()); properties.SetPageSegMode(3); tesseractReader.SetTesseract4OcrEngineProperties(properties); DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("eng", "deu", "spa"), DeviceCmyk.BLACK); NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, TEST_DOCUMENTS_DIRECTORY , "diff_")); } finally { NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties ().GetTextPositioning()); NUnit.Framework.Assert.AreEqual(3, tesseractReader.GetTesseract4OcrEngineProperties().GetPageSegMode().Value ); } }
public IntegrationTestHelper() { Tesseract4OcrEngineProperties ocrEngineProperties = new Tesseract4OcrEngineProperties(); ocrEngineProperties.SetPathToTessData(GetTessDataDirectory()); tesseractLibReader = new Tesseract4LibOcrEngine(ocrEngineProperties); tesseractExecutableReader = new Tesseract4ExecutableOcrEngine(GetTesseractDirectory(), ocrEngineProperties ); }
public virtual void CompareInvoiceFrontThaiImage() { String testName = "compareInvoiceFrontThaiImage"; String filename = "invoice_front_thai"; //Tesseract for Java and Tesseract for .NET give different output //So we cannot use one reference pdf file for them String expectedPdfPathJava = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + "_java.pdf"; String expectedPdfPathDotNet = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + "_dotnet.pdf"; String resultPdfPath = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf"; Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetTextPositioning(TextPositioning.BY_WORDS_AND_LINES); properties.SetPathToTessData(GetTessDataDirectory()); properties.SetLanguages(JavaUtil.ArraysAsList("tha", "eng")); tesseractReader.SetTesseract4OcrEngineProperties(properties); DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("tha", "eng"), JavaUtil.ArraysAsList(NOTO_SANS_THAI_FONT_PATH, NOTO_SANS_FONT_PATH), DeviceRgb.RED); bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY , "diff_") == null; bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY , "diff_") == null; NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest); }