Example #1
0
        public virtual void TestTextFromPdfLayers()
        {
            String   testName = "testTextFromPdfLayers";
            String   path     = PdfHelper.GetDefaultImagePath();
            String   pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetImageLayerName("Image Layer");
            properties.SetTextLayerName("Text Layer");
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter
                                                                      (pdfPath));

            NUnit.Framework.Assert.IsNotNull(doc);
            IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers();

            NUnit.Framework.Assert.AreEqual(2, layers.Count);
            NUnit.Framework.Assert.AreEqual("Image Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[0].IsOn());
            NUnit.Framework.Assert.AreEqual("Text Layer", layers[1].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[1].IsOn());
            doc.Close();
            NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, PdfHelper.GetTextFromPdfLayer(pdfPath, "Text Layer"
                                                                                                  ));
            NUnit.Framework.Assert.AreEqual("", PdfHelper.GetTextFromPdfLayer(pdfPath, "Image Layer"));
        }
Example #2
0
        public virtual void TestReusingFontProvider()
        {
            String       testName     = "testReusingFontProvider";
            String       path         = PdfHelper.GetDefaultImagePath();
            String       pdfPathA3u   = PdfHelper.GetTargetDirectory() + testName + "_a3u.pdf";
            String       pdfPath      = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo     file         = new FileInfo(path);
            FontProvider fontProvider = new FontProvider("FreeSans");

            fontProvider.AddFont(PdfHelper.GetFreeSansFontPath());
            PdfOcrFontProvider      pdfOcrFontProvider      = new PdfOcrFontProvider(fontProvider.GetFontSet(), "FreeSans");
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetPdfLang("en-US");
            ocrPdfCreatorProperties.SetFontProvider(pdfOcrFontProvider);
            PdfHelper.CreatePdfA(pdfPathA3u, file, ocrPdfCreatorProperties, PdfHelper.GetCMYKPdfOutputIntent());
            PdfHelper.CreatePdf(pdfPath, file, ocrPdfCreatorProperties);
            ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPathA3u);
            PdfFont            font     = strategy.GetPdfFont();
            String             fontName = font.GetFontProgram().GetFontNames().GetFontName();

            NUnit.Framework.Assert.IsTrue(fontName.Contains("FreeSans"));
            NUnit.Framework.Assert.IsTrue(font.IsEmbedded());
            NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, strategy.GetResultantText());
            strategy = PdfHelper.GetExtractionStrategy(pdfPath);
            font     = strategy.GetPdfFont();
            fontName = font.GetFontProgram().GetFontNames().GetFontName();
            NUnit.Framework.Assert.IsTrue(fontName.Contains("FreeSans"));
            NUnit.Framework.Assert.IsTrue(font.IsEmbedded());
            NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, strategy.GetResultantText());
        }
Example #3
0
        public virtual void TestCompliantThaiPdfA()
        {
            String testName = "testCompliantThaiPdfA";
            String path     = PdfHelper.GetThaiImagePath();
            String pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetPdfLang("en-US");
            ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK);
            FontProvider fontProvider = new FontProvider("Kanit");

            fontProvider.AddFont(PdfHelper.GetKanitFontPath());
            PdfOcrFontProvider pdfOcrFontProvider = new PdfOcrFontProvider(fontProvider.GetFontSet(), "Kanit");

            ocrPdfCreatorProperties.SetFontProvider(pdfOcrFontProvider);
            PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent
                                     ());
            String resultWithActualText = PdfHelper.GetTextFromPdfLayerUseActualText(pdfPath, null);

            NUnit.Framework.Assert.AreEqual(PdfHelper.THAI_TEXT, resultWithActualText);
            String resultWithoutUseActualText = PdfHelper.GetTextFromPdfLayer(pdfPath, null);

            NUnit.Framework.Assert.AreEqual(PdfHelper.THAI_TEXT, resultWithoutUseActualText);
            NUnit.Framework.Assert.AreEqual(resultWithoutUseActualText, resultWithActualText);
            ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath);
            PdfFont            font     = strategy.GetPdfFont();
            String             fontName = font.GetFontProgram().GetFontNames().GetFontName();

            NUnit.Framework.Assert.IsTrue(fontName.Contains("Kanit"));
            NUnit.Framework.Assert.IsTrue(font.IsEmbedded());
        }
Example #4
0
        public virtual void TestScaleWidthMode()
        {
            String    testName                 = "testScaleWidthMode";
            String    path                     = PdfHelper.GetDefaultImagePath();
            String    pdfPath                  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo  file                     = new FileInfo(path);
            float     pageWidthPt              = 400f;
            float     pageHeightPt             = 400f;
            Rectangle pageSize                 = new Rectangle(pageWidthPt, pageHeightPt);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetScaleMode(ScaleMode.SCALE_WIDTH);
            properties.SetPageSize(pageSize);
            PdfHelper.CreatePdf(pdfPath, file, properties);
            Rectangle rect = GetImageBBoxRectangleFromPdf(pdfPath);
            ImageData originalImageData = ImageDataFactory.Create(file.FullName);

            // page size should be equal to the result image size
            // result image height should be equal to the value that
            // was set as page height result image width should be scaled
            // proportionally according to the provided image height
            // and original image size
            NUnit.Framework.Assert.AreEqual(pageHeightPt, rect.GetHeight(), DELTA);
            NUnit.Framework.Assert.AreEqual(originalImageData.GetWidth() / originalImageData.GetHeight(), rect.GetWidth
                                                () / rect.GetHeight(), DELTA);
        }
Example #5
0
 public virtual void TestIncompatibleOutputIntentAndFontColorSpaceException()
 {
     NUnit.Framework.Assert.That(() => {
         String testName = "testIncompatibleOutputIntentAndFontColorSpaceException";
         String path     = PdfHelper.GetDefaultImagePath();
         String pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
         OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
         ocrPdfCreatorProperties.SetPdfLang("en-US");
         ocrPdfCreatorProperties.SetTextColor(DeviceCmyk.BLACK);
         PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent
                                  ());
     }
                                 , NUnit.Framework.Throws.InstanceOf <PdfException>().With.Message.EqualTo(PdfAConformanceException.DEVICECMYK_MAY_BE_USED_ONLY_IF_THE_FILE_HAS_A_CMYK_PDFA_OUTPUT_INTENT_OR_DEFAULTCMYK_IN_USAGE_CONTEXT))
     ;
 }
Example #6
0
        public virtual void TestPdfA3uWithNullIntent()
        {
            String testName = "testPdfA3uWithNullIntent";
            String path     = PdfHelper.GetDefaultImagePath();
            String pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetTextColor(DeviceCmyk.BLACK);
            properties.SetScaleMode(ScaleMode.SCALE_TO_FIT);
            PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), properties, null);
            String result = PdfHelper.GetTextFromPdfLayer(pdfPath, null);

            NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result);
            NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode());
        }
Example #7
0
 public virtual void TestNonCompliantThaiPdfA()
 {
     NUnit.Framework.Assert.That(() => {
         String testName = "testNonCompliantThaiPdfA";
         String path     = PdfHelper.GetThaiImagePath();
         String pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
         OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
         ocrPdfCreatorProperties.SetPdfLang("en-US");
         ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK);
         PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent
                                  ());
     }
                                 , NUnit.Framework.Throws.InstanceOf <OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611))))
     ;
 }
Example #8
0
        /// <summary>
        /// Perform OCR using provided path to image (imgPath)
        /// and save result PDF document to "pdfPath".
        /// </summary>
        /// <remarks>
        /// Perform OCR using provided path to image (imgPath)
        /// and save result PDF document to "pdfPath".
        /// (Method is used for compare tool)
        /// </remarks>
        protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String
                                                              imgPath, String pdfPath, IList <String> languages, IList <String> fonts, Color color)
        {
            if (languages != null)
            {
                Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
                properties.SetLanguages(languages);
                tesseractReader.SetTesseract4OcrEngineProperties(properties);
            }
            OcrPdfCreatorProperties properties_1 = new OcrPdfCreatorProperties();

            properties_1.SetPdfLang("en-US");
            properties_1.SetTitle("");
            if (fonts != null && fonts.Count > 0)
            {
                FontProvider fontProvider = new FontProvider();
                foreach (String fontPath in fonts)
                {
                    String name = FONT_PATH_TO_FONT_NAME_MAP.Get(fontPath);
                    fontProvider.GetFontSet().AddFont(fontPath, PdfEncodings.IDENTITY_H, name);
                }
                properties_1.SetFontProvider(fontProvider);
            }
            if (color != null)
            {
                properties_1.SetTextColor(color);
            }
            if (languages != null)
            {
                NUnit.Framework.Assert.AreEqual(languages.Count, tesseractReader.GetTesseract4OcrEngineProperties().GetLanguages
                                                    ().Count);
            }
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties_1);

            try {
                using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) {
                    PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(imgPath
                                                                                                                        )), pdfWriter);
                    NUnit.Framework.Assert.IsNotNull(doc);
                    doc.Close();
                }
            }
            catch (System.IO.IOException e) {
                LOGGER.Error(e.Message);
            }
        }
Example #9
0
        public virtual void TestDefaultFontInPdfARgb()
        {
            String   testName = "testDefaultFontInPdf";
            String   path     = PdfHelper.GetDefaultImagePath();
            String   pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetPdfLang("en-US");
            ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK);
            PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent());
            ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath);
            PdfFont            font     = strategy.GetPdfFont();
            String             fontName = font.GetFontProgram().GetFontNames().GetFontName();

            NUnit.Framework.Assert.IsTrue(fontName.Contains("LiberationSans"));
            NUnit.Framework.Assert.IsTrue(font.IsEmbedded());
        }
Example #10
0
        public virtual void TestFontColor()
        {
            String   testName = "testFontColor";
            String   path     = PdfHelper.GetImagesTestDirectory() + "multipage.tiff";
            String   pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetScaleMode(ScaleMode.SCALE_TO_FIT);
            properties.SetTextLayerName("Text1");
            Color color = DeviceCmyk.CYAN;

            properties.SetTextColor(color);
            PdfHelper.CreatePdf(pdfPath, file, properties);
            ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath, "Text1");
            Color fillColor             = strategy.GetFillColor();

            NUnit.Framework.Assert.AreEqual(color, fillColor);
        }
Example #11
0
        public virtual void TestPdfA3DefaultMetadata()
        {
            String   testName = "testPdfDefaultMetadata";
            String   path     = PdfHelper.GetDefaultImagePath();
            String   pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetPdfLang("en-US");
            ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK);
            PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent());
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));

            NUnit.Framework.Assert.AreEqual("en-US", pdfDocument.GetCatalog().GetLang().ToString());
            NUnit.Framework.Assert.AreEqual(null, pdfDocument.GetDocumentInfo().GetTitle());
            NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U, pdfDocument.GetReader().GetPdfAConformanceLevel
                                                ());
            pdfDocument.Close();
        }
Example #12
0
        public virtual void TestPdfLayersWithTextLayerOnly()
        {
            String   path = PdfHelper.GetDefaultImagePath();
            FileInfo file = new FileInfo(path);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetTextLayerName("Text Layer");
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter
                                                                      ());

            NUnit.Framework.Assert.IsNotNull(doc);
            IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers();

            NUnit.Framework.Assert.AreEqual(1, layers.Count);
            NUnit.Framework.Assert.AreEqual("Text Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[0].IsOn());
            doc.Close();
        }
Example #13
0
 public virtual void TestInvalidFontWithInvalidDefaultFontFamily()
 {
     NUnit.Framework.Assert.That(() => {
         String testName = "testInvalidFontWithInvalidDefaultFontFamily";
         String path     = PdfHelper.GetDefaultImagePath();
         String pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
         FileInfo file   = new FileInfo(path);
         OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
         FontProvider pdfOcrFontProvider    = new FontProvider("Font");
         pdfOcrFontProvider.GetFontSet().AddFont("font.ttf", PdfEncodings.IDENTITY_H, "Font");
         properties.SetFontProvider(pdfOcrFontProvider, "Font");
         properties.SetScaleMode(ScaleMode.SCALE_TO_FIT);
         PdfHelper.CreatePdf(pdfPath, file, properties);
         String result = PdfHelper.GetTextFromPdfLayer(pdfPath, null);
         NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result);
         NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode());
     }
                                 , NUnit.Framework.Throws.InstanceOf <OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, OcrException.CANNOT_RESOLVE_PROVIDED_FONTS)))
     ;
 }
Example #14
0
        public virtual void TestCustomFontInPdf()
        {
            String       testName     = "testDefaultFontInPdf";
            String       path         = PdfHelper.GetDefaultImagePath();
            String       pdfPath      = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo     file         = new FileInfo(path);
            FontProvider fontProvider = new FontProvider("FreeSans");

            fontProvider.GetFontSet().AddFont(PdfHelper.GetFreeSansFontPath(), PdfEncodings.IDENTITY_H, "FreeSans");
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetPdfLang("en-US");
            ocrPdfCreatorProperties.SetFontProvider(fontProvider, "FreeSans");
            PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetCMYKPdfOutputIntent());
            ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath);
            PdfFont            font     = strategy.GetPdfFont();
            String             fontName = font.GetFontProgram().GetFontNames().GetFontName();

            NUnit.Framework.Assert.IsTrue(fontName.Contains("FreeSans"));
            NUnit.Framework.Assert.IsTrue(font.IsEmbedded());
        }
Example #15
0
        public virtual void TestScaleHeightMode()
        {
            String    testName                 = "testScaleHeightMode";
            String    path                     = PdfHelper.GetDefaultImagePath();
            String    pdfPath                  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo  file                     = new FileInfo(path);
            float     pageWidthPt              = 400f;
            float     pageHeightPt             = 400f;
            Rectangle pageSize                 = new Rectangle(pageWidthPt, pageHeightPt);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetScaleMode(ScaleMode.SCALE_HEIGHT);
            properties.SetPageSize(pageSize);
            PdfHelper.CreatePdf(pdfPath, file, properties);
            Rectangle rect = GetImageBBoxRectangleFromPdf(pdfPath);
            ImageData originalImageData = ImageDataFactory.Create(file.FullName);

            NUnit.Framework.Assert.AreEqual(pageWidthPt, rect.GetWidth(), DELTA);
            NUnit.Framework.Assert.AreEqual(originalImageData.GetWidth() / originalImageData.GetHeight(), rect.GetWidth
                                                () / rect.GetHeight(), DELTA);
        }
Example #16
0
        public virtual void TestPdfCustomMetadata()
        {
            String   testName = "testPdfCustomMetadata";
            String   path     = PdfHelper.GetDefaultImagePath();
            String   pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
            String locale = "nl-BE";

            properties.SetPdfLang(locale);
            String title = "Title";

            properties.SetTitle(title);
            PdfHelper.CreatePdfA(pdfPath, file, new OcrPdfCreatorProperties(properties), PdfHelper.GetCMYKPdfOutputIntent
                                     ());
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));

            NUnit.Framework.Assert.AreEqual(locale, pdfDocument.GetCatalog().GetLang().ToString());
            NUnit.Framework.Assert.AreEqual(title, pdfDocument.GetDocumentInfo().GetTitle());
            NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U, pdfDocument.GetReader().GetPdfAConformanceLevel
                                                ());
            pdfDocument.Close();
        }
Example #17
0
 /// <summary>
 /// Sets properties for
 /// <see cref="OcrPdfCreator"/>.
 /// </summary>
 /// <param name="ocrPdfCreatorProperties">
 /// set of properties
 /// <see cref="OcrPdfCreatorProperties"/>
 /// for
 /// <see cref="OcrPdfCreator"/>
 /// </param>
 public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProperties)
 {
     this.ocrPdfCreatorProperties = ocrPdfCreatorProperties;
 }
Example #18
0
 /// <summary>
 /// Creates a new
 /// <see cref="OcrPdfCreator"/>
 /// instance.
 /// </summary>
 /// <param name="ocrEngine">
 /// selected OCR Reader
 /// <see cref="IOcrEngine"/>
 /// </param>
 /// <param name="ocrPdfCreatorProperties">
 /// set of properties for
 /// <see cref="OcrPdfCreator"/>
 /// </param>
 public OcrPdfCreator(IOcrEngine ocrEngine, OcrPdfCreatorProperties ocrPdfCreatorProperties)
 {
     SetOcrEngine(ocrEngine);
     SetOcrPdfCreatorProperties(ocrPdfCreatorProperties);
 }