示例#1
0
        public virtual void CompareMultiLangImage()
        {
            String testName        = "compareMultiLangImage";
            String filename        = "multilang";
            String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + ".pdf";
            String resultPdfPath   = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf";

            try {
                Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
                properties.SetTextPositioning(TextPositioning.BY_WORDS);
                properties.SetPathToTessData(GetTessDataDirectory());
                properties.SetPageSegMode(3);
                tesseractReader.SetTesseract4OcrEngineProperties(properties);
                DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList
                                          ("eng", "deu", "spa"), DeviceCmyk.BLACK);
                NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, TEST_DOCUMENTS_DIRECTORY
                                                                                 , "diff_"));
            }
            finally {
                NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties
                                                    ().GetTextPositioning());
                NUnit.Framework.Assert.AreEqual(3, tesseractReader.GetTesseract4OcrEngineProperties().GetPageSegMode().Value
                                                );
            }
        }
        public virtual void CompareInvoiceFrontThaiImage()
        {
            String testName = "compareInvoiceFrontThaiImage";
            String filename = "invoice_front_thai";
            //Tesseract for Java and Tesseract for .NET give different output
            //So we cannot use one reference pdf file for them
            String expectedPdfPathJava               = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + "_java.pdf";
            String expectedPdfPathDotNet             = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + "_dotnet.pdf";
            String resultPdfPath                     = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf";
            Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();

            properties.SetTextPositioning(TextPositioning.BY_WORDS_AND_LINES);
            properties.SetPathToTessData(GetTessDataDirectory());
            properties.SetLanguages(JavaUtil.ArraysAsList("tha", "eng"));
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList
                                      ("tha", "eng"), JavaUtil.ArraysAsList(NOTO_SANS_THAI_FONT_PATH, NOTO_SANS_FONT_PATH), DeviceRgb.RED);
            bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY
                                                               , "diff_") == null;
            bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY
                                                                 , "diff_") == null;

            NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest);
        }
示例#3
0
        public virtual void TestSpanishPNG()
        {
            String         testName                  = "compareSpanishPNG";
            String         filename                  = "scanned_spa_01";
            String         expectedText1             = "¿Y SI ENSAYARA COMO ACTUAR?";
            String         expectedText2             = "¿Y SI ENSAYARA ACTUAR?";
            String         resultPdfPath             = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf";
            IList <String> languages                 = JavaUtil.ArraysAsList("spa", "spa_old");
            Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();

            if (isExecutableReaderType)
            {
                properties.SetPreprocessingImages(false);
            }
            // locate text by words
            properties.SetTextPositioning(TextPositioning.BY_WORDS);
            properties.SetLanguages(languages);
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetTextColor(DeviceCmyk.BLACK);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);

            using (PdfWriter pdfWriter = GetPdfWriter(resultPdfPath)) {
                ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(TEST_IMAGES_DIRECTORY + filename
                                                                                                  + ".png")), pdfWriter).Close();
            }
            try {
                String result = GetTextFromPdfLayer(resultPdfPath, null, 1).Replace("\n", " ");
                NUnit.Framework.Assert.IsTrue(result.Contains(expectedText1) || result.Contains(expectedText2));
            }
            finally {
                NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties
                                                    ().GetTextPositioning());
            }
        }