public virtual void TestTextFromPdfLayers()
        {
            String   testName = "testTextFromPdfLayers";
            String   path     = PdfHelper.GetDefaultImagePath();
            String   pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetImageLayerName("Image Layer");
            properties.SetTextLayerName("Text Layer");
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter
                                                                      (pdfPath));

            NUnit.Framework.Assert.IsNotNull(doc);
            IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers();

            NUnit.Framework.Assert.AreEqual(2, layers.Count);
            NUnit.Framework.Assert.AreEqual("Image Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[0].IsOn());
            NUnit.Framework.Assert.AreEqual("Text Layer", layers[1].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[1].IsOn());
            doc.Close();
            NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, PdfHelper.GetTextFromPdfLayer(pdfPath, "Text Layer"
                                                                                                  ));
            NUnit.Framework.Assert.AreEqual("", PdfHelper.GetTextFromPdfLayer(pdfPath, "Image Layer"));
        }
Exemple #2
0
        /// <summary>
        /// Perform OCR using provided path to image (imgPath)
        /// and save result PDF document to "pdfPath".
        /// </summary>
        /// <remarks>
        /// Perform OCR using provided path to image (imgPath)
        /// and save result PDF document to "pdfPath".
        /// (Method is used for compare tool)
        /// </remarks>
        protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String
                                                              imgPath, String pdfPath, IList <String> languages, IList <String> fonts, Color color)
        {
            if (languages != null)
            {
                Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
                properties.SetLanguages(languages);
                tesseractReader.SetTesseract4OcrEngineProperties(properties);
            }
            OcrPdfCreatorProperties properties_1 = new OcrPdfCreatorProperties();

            properties_1.SetPdfLang("en-US");
            properties_1.SetTitle("");
            if (fonts != null && fonts.Count > 0)
            {
                FontProvider fontProvider = new FontProvider();
                foreach (String fontPath in fonts)
                {
                    String name = FONT_PATH_TO_FONT_NAME_MAP.Get(fontPath);
                    fontProvider.GetFontSet().AddFont(fontPath, PdfEncodings.IDENTITY_H, name);
                }
                properties_1.SetFontProvider(fontProvider);
            }
            if (color != null)
            {
                properties_1.SetTextColor(color);
            }
            if (languages != null)
            {
                NUnit.Framework.Assert.AreEqual(languages.Count, tesseractReader.GetTesseract4OcrEngineProperties().GetLanguages
                                                    ().Count);
            }
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties_1);

            try {
                using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) {
                    PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(imgPath
                                                                                                                        )), pdfWriter);
                    NUnit.Framework.Assert.IsNotNull(doc);
                    doc.Close();
                }
            }
            catch (System.IO.IOException e) {
                LOGGER.Error(e.Message);
            }
        }
        public virtual void TestOriginalSizeScaleMode()
        {
            String        path          = PdfHelper.GetDefaultImagePath();
            FileInfo      file          = new FileInfo(path);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine());
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter
                                                                      ());

            NUnit.Framework.Assert.IsNotNull(doc);
            ImageData imageData   = ImageDataFactory.Create(file.FullName);
            float     imageWidth  = GetPoints(imageData.GetWidth());
            float     imageHeight = GetPoints(imageData.GetHeight());
            float     realWidth   = doc.GetFirstPage().GetPageSize().GetWidth();
            float     realHeight  = doc.GetFirstPage().GetPageSize().GetHeight();

            NUnit.Framework.Assert.AreEqual(imageWidth, realWidth, DELTA);
            NUnit.Framework.Assert.AreEqual(imageHeight, realHeight, DELTA);
            doc.Close();
        }
        public virtual void TestPdfLayersWithTextLayerOnly()
        {
            String   path = PdfHelper.GetDefaultImagePath();
            FileInfo file = new FileInfo(path);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetTextLayerName("Text Layer");
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter
                                                                      ());

            NUnit.Framework.Assert.IsNotNull(doc);
            IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers();

            NUnit.Framework.Assert.AreEqual(1, layers.Count);
            NUnit.Framework.Assert.AreEqual("Text Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[0].IsOn());
            doc.Close();
        }
        public virtual void TestPdfLayersWithDefaultNames()
        {
            String              path = PdfHelper.GetDefaultImagePath();
            FileInfo            file = new FileInfo(path);
            OcrEngineProperties ocrEngineProperties = new OcrEngineProperties();

            ocrEngineProperties.SetLanguages(JavaCollectionsUtil.SingletonList <String>("eng"));
            CustomOcrEngine engine        = new CustomOcrEngine(ocrEngineProperties);
            OcrPdfCreator   ocrPdfCreator = new OcrPdfCreator(engine);
            PdfDocument     doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter
                                                                        ());

            NUnit.Framework.Assert.IsNotNull(doc);
            IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers();

            NUnit.Framework.Assert.AreEqual(0, layers.Count);
            doc.Close();
            NUnit.Framework.Assert.AreEqual(engine, ocrPdfCreator.GetOcrEngine());
            NUnit.Framework.Assert.AreEqual(1, engine.GetOcrEngineProperties().GetLanguages().Count);
            NUnit.Framework.Assert.AreEqual("eng", engine.GetOcrEngineProperties().GetLanguages()[0]);
        }