public virtual void TestFontColorInMultiPagePdf()
        {
            String   testName = "testFontColorInMultiPagePdf";
            String   path     = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
            String   pdfPath  = GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);

            tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPreprocessingImages
                                                                 (false));
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetTextLayerName("Text1");
            Color color = DeviceCmyk.MAGENTA;

            ocrPdfCreatorProperties.SetTextColor(color);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), GetPdfWriter(
                                                                      pdfPath));

            NUnit.Framework.Assert.IsNotNull(doc);
            doc.Close();
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));

            IntegrationTestHelper.ExtractionStrategy strategy = new IntegrationTestHelper.ExtractionStrategy("Text1");
            PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            Color fillColor = strategy.GetFillColor();

            NUnit.Framework.Assert.AreEqual(fillColor, color);
            pdfDocument.Close();
        }
Exemplo n.º 2
0
        public virtual void ComparePdfA3uRGBSpanishJPG()
        {
            String testName        = "comparePdfA3uRGBSpanishJPG";
            String filename        = "spanish_01";
            String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_a3u.pdf";
            String resultPdfPath   = GetTargetDirectory() + filename + "_" + testName + "_a3u.pdf";
            Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties
                                                                                             ());

            properties.SetPathToTessData(GetTessDataDirectory());
            properties.SetLanguages(JavaCollectionsUtil.SingletonList <String>("spa"));
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetPdfLang("en-US");
            ocrPdfCreatorProperties.SetTitle("");
            ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdfA(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(TEST_IMAGES_DIRECTORY
                                                                                                                             + filename + ".jpg")), GetPdfWriter(resultPdfPath), GetRGBPdfOutputIntent());

            NUnit.Framework.Assert.IsNotNull(doc);
            doc.Close();
            NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, GetTargetDirectory
                                                                                 (), "diff_"));
        }
Exemplo n.º 3
0
        public virtual void TestSpanishPNG()
        {
            String         testName                  = "compareSpanishPNG";
            String         filename                  = "scanned_spa_01";
            String         expectedText1             = "¿Y SI ENSAYARA COMO ACTUAR?";
            String         expectedText2             = "¿Y SI ENSAYARA ACTUAR?";
            String         resultPdfPath             = GetTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf";
            IList <String> languages                 = JavaUtil.ArraysAsList("spa", "spa_old");
            Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();

            if (isExecutableReaderType)
            {
                properties.SetPreprocessingImages(false);
            }
            // locate text by words
            properties.SetTextPositioning(TextPositioning.BY_WORDS);
            properties.SetLanguages(languages);
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetTextColor(DeviceCmyk.BLACK);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);

            using (PdfWriter pdfWriter = GetPdfWriter(resultPdfPath)) {
                ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(TEST_IMAGES_DIRECTORY + filename
                                                                                                  + ".png")), pdfWriter).Close();
            }
            try {
                String result = GetTextFromPdfLayer(resultPdfPath, null, 1).Replace("\n", " ");
                NUnit.Framework.Assert.IsTrue(result.Contains(expectedText1) || result.Contains(expectedText2));
            }
            finally {
                NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties
                                                    ().GetTextPositioning());
            }
        }