public virtual void TestTextFromPdfLayers() { String testName = "testTextFromPdfLayers"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetImageLayerName("Image Layer"); properties.SetTextLayerName("Text Layer"); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties); PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter (pdfPath)); NUnit.Framework.Assert.IsNotNull(doc); IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers(); NUnit.Framework.Assert.AreEqual(2, layers.Count); NUnit.Framework.Assert.AreEqual("Image Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString()); NUnit.Framework.Assert.IsTrue(layers[0].IsOn()); NUnit.Framework.Assert.AreEqual("Text Layer", layers[1].GetPdfObject().Get(PdfName.Name).ToString()); NUnit.Framework.Assert.IsTrue(layers[1].IsOn()); doc.Close(); NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, PdfHelper.GetTextFromPdfLayer(pdfPath, "Text Layer" )); NUnit.Framework.Assert.AreEqual("", PdfHelper.GetTextFromPdfLayer(pdfPath, "Image Layer")); }
public virtual void TestReusingFontProvider() { String testName = "testReusingFontProvider"; String path = PdfHelper.GetDefaultImagePath(); String pdfPathA3u = PdfHelper.GetTargetDirectory() + testName + "_a3u.pdf"; String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); FontProvider fontProvider = new FontProvider("FreeSans"); fontProvider.AddFont(PdfHelper.GetFreeSansFontPath()); PdfOcrFontProvider pdfOcrFontProvider = new PdfOcrFontProvider(fontProvider.GetFontSet(), "FreeSans"); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetFontProvider(pdfOcrFontProvider); PdfHelper.CreatePdfA(pdfPathA3u, file, ocrPdfCreatorProperties, PdfHelper.GetCMYKPdfOutputIntent()); PdfHelper.CreatePdf(pdfPath, file, ocrPdfCreatorProperties); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPathA3u); PdfFont font = strategy.GetPdfFont(); String fontName = font.GetFontProgram().GetFontNames().GetFontName(); NUnit.Framework.Assert.IsTrue(fontName.Contains("FreeSans")); NUnit.Framework.Assert.IsTrue(font.IsEmbedded()); NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, strategy.GetResultantText()); strategy = PdfHelper.GetExtractionStrategy(pdfPath); font = strategy.GetPdfFont(); fontName = font.GetFontProgram().GetFontNames().GetFontName(); NUnit.Framework.Assert.IsTrue(fontName.Contains("FreeSans")); NUnit.Framework.Assert.IsTrue(font.IsEmbedded()); NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, strategy.GetResultantText()); }
public virtual void TestCompliantThaiPdfA() { String testName = "testCompliantThaiPdfA"; String path = PdfHelper.GetThaiImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); FontProvider fontProvider = new FontProvider("Kanit"); fontProvider.AddFont(PdfHelper.GetKanitFontPath()); PdfOcrFontProvider pdfOcrFontProvider = new PdfOcrFontProvider(fontProvider.GetFontSet(), "Kanit"); ocrPdfCreatorProperties.SetFontProvider(pdfOcrFontProvider); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent ()); String resultWithActualText = PdfHelper.GetTextFromPdfLayerUseActualText(pdfPath, null); NUnit.Framework.Assert.AreEqual(PdfHelper.THAI_TEXT, resultWithActualText); String resultWithoutUseActualText = PdfHelper.GetTextFromPdfLayer(pdfPath, null); NUnit.Framework.Assert.AreEqual(PdfHelper.THAI_TEXT, resultWithoutUseActualText); NUnit.Framework.Assert.AreEqual(resultWithoutUseActualText, resultWithActualText); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath); PdfFont font = strategy.GetPdfFont(); String fontName = font.GetFontProgram().GetFontNames().GetFontName(); NUnit.Framework.Assert.IsTrue(fontName.Contains("Kanit")); NUnit.Framework.Assert.IsTrue(font.IsEmbedded()); }
public virtual void TestScaleWidthMode() { String testName = "testScaleWidthMode"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); float pageWidthPt = 400f; float pageHeightPt = 400f; Rectangle pageSize = new Rectangle(pageWidthPt, pageHeightPt); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetScaleMode(ScaleMode.SCALE_WIDTH); properties.SetPageSize(pageSize); PdfHelper.CreatePdf(pdfPath, file, properties); Rectangle rect = GetImageBBoxRectangleFromPdf(pdfPath); ImageData originalImageData = ImageDataFactory.Create(file.FullName); // page size should be equal to the result image size // result image height should be equal to the value that // was set as page height result image width should be scaled // proportionally according to the provided image height // and original image size NUnit.Framework.Assert.AreEqual(pageHeightPt, rect.GetHeight(), DELTA); NUnit.Framework.Assert.AreEqual(originalImageData.GetWidth() / originalImageData.GetHeight(), rect.GetWidth () / rect.GetHeight(), DELTA); }
public virtual void TestIncompatibleOutputIntentAndFontColorSpaceException() { NUnit.Framework.Assert.That(() => { String testName = "testIncompatibleOutputIntentAndFontColorSpaceException"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceCmyk.BLACK); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent ()); } , NUnit.Framework.Throws.InstanceOf <PdfException>().With.Message.EqualTo(PdfAConformanceException.DEVICECMYK_MAY_BE_USED_ONLY_IF_THE_FILE_HAS_A_CMYK_PDFA_OUTPUT_INTENT_OR_DEFAULTCMYK_IN_USAGE_CONTEXT)) ; }
public virtual void TestPdfA3uWithNullIntent() { String testName = "testPdfA3uWithNullIntent"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetTextColor(DeviceCmyk.BLACK); properties.SetScaleMode(ScaleMode.SCALE_TO_FIT); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), properties, null); String result = PdfHelper.GetTextFromPdfLayer(pdfPath, null); NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result); NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode()); }
public virtual void TestNonCompliantThaiPdfA() { NUnit.Framework.Assert.That(() => { String testName = "testNonCompliantThaiPdfA"; String path = PdfHelper.GetThaiImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent ()); } , NUnit.Framework.Throws.InstanceOf <OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611)))) ; }
/// <summary> /// Perform OCR using provided path to image (imgPath) /// and save result PDF document to "pdfPath". /// </summary> /// <remarks> /// Perform OCR using provided path to image (imgPath) /// and save result PDF document to "pdfPath". /// (Method is used for compare tool) /// </remarks> protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String imgPath, String pdfPath, IList <String> languages, IList <String> fonts, Color color) { if (languages != null) { Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetLanguages(languages); tesseractReader.SetTesseract4OcrEngineProperties(properties); } OcrPdfCreatorProperties properties_1 = new OcrPdfCreatorProperties(); properties_1.SetPdfLang("en-US"); properties_1.SetTitle(""); if (fonts != null && fonts.Count > 0) { FontProvider fontProvider = new FontProvider(); foreach (String fontPath in fonts) { String name = FONT_PATH_TO_FONT_NAME_MAP.Get(fontPath); fontProvider.GetFontSet().AddFont(fontPath, PdfEncodings.IDENTITY_H, name); } properties_1.SetFontProvider(fontProvider); } if (color != null) { properties_1.SetTextColor(color); } if (languages != null) { NUnit.Framework.Assert.AreEqual(languages.Count, tesseractReader.GetTesseract4OcrEngineProperties().GetLanguages ().Count); } OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties_1); try { using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) { PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(imgPath )), pdfWriter); NUnit.Framework.Assert.IsNotNull(doc); doc.Close(); } } catch (System.IO.IOException e) { LOGGER.Error(e.Message); } }
public virtual void TestDefaultFontInPdfARgb() { String testName = "testDefaultFontInPdf"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent()); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath); PdfFont font = strategy.GetPdfFont(); String fontName = font.GetFontProgram().GetFontNames().GetFontName(); NUnit.Framework.Assert.IsTrue(fontName.Contains("LiberationSans")); NUnit.Framework.Assert.IsTrue(font.IsEmbedded()); }
public virtual void TestFontColor() { String testName = "testFontColor"; String path = PdfHelper.GetImagesTestDirectory() + "multipage.tiff"; String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetScaleMode(ScaleMode.SCALE_TO_FIT); properties.SetTextLayerName("Text1"); Color color = DeviceCmyk.CYAN; properties.SetTextColor(color); PdfHelper.CreatePdf(pdfPath, file, properties); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath, "Text1"); Color fillColor = strategy.GetFillColor(); NUnit.Framework.Assert.AreEqual(color, fillColor); }
public virtual void TestPdfA3DefaultMetadata() { String testName = "testPdfDefaultMetadata"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent()); PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath)); NUnit.Framework.Assert.AreEqual("en-US", pdfDocument.GetCatalog().GetLang().ToString()); NUnit.Framework.Assert.AreEqual(null, pdfDocument.GetDocumentInfo().GetTitle()); NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U, pdfDocument.GetReader().GetPdfAConformanceLevel ()); pdfDocument.Close(); }
public virtual void TestPdfLayersWithTextLayerOnly() { String path = PdfHelper.GetDefaultImagePath(); FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetTextLayerName("Text Layer"); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties); PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter ()); NUnit.Framework.Assert.IsNotNull(doc); IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers(); NUnit.Framework.Assert.AreEqual(1, layers.Count); NUnit.Framework.Assert.AreEqual("Text Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString()); NUnit.Framework.Assert.IsTrue(layers[0].IsOn()); doc.Close(); }
public virtual void TestInvalidFontWithInvalidDefaultFontFamily() { NUnit.Framework.Assert.That(() => { String testName = "testInvalidFontWithInvalidDefaultFontFamily"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); FontProvider pdfOcrFontProvider = new FontProvider("Font"); pdfOcrFontProvider.GetFontSet().AddFont("font.ttf", PdfEncodings.IDENTITY_H, "Font"); properties.SetFontProvider(pdfOcrFontProvider, "Font"); properties.SetScaleMode(ScaleMode.SCALE_TO_FIT); PdfHelper.CreatePdf(pdfPath, file, properties); String result = PdfHelper.GetTextFromPdfLayer(pdfPath, null); NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result); NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode()); } , NUnit.Framework.Throws.InstanceOf <OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, OcrException.CANNOT_RESOLVE_PROVIDED_FONTS))) ; }
public virtual void TestCustomFontInPdf() { String testName = "testDefaultFontInPdf"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); FontProvider fontProvider = new FontProvider("FreeSans"); fontProvider.GetFontSet().AddFont(PdfHelper.GetFreeSansFontPath(), PdfEncodings.IDENTITY_H, "FreeSans"); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetFontProvider(fontProvider, "FreeSans"); PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetCMYKPdfOutputIntent()); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath); PdfFont font = strategy.GetPdfFont(); String fontName = font.GetFontProgram().GetFontNames().GetFontName(); NUnit.Framework.Assert.IsTrue(fontName.Contains("FreeSans")); NUnit.Framework.Assert.IsTrue(font.IsEmbedded()); }
public virtual void TestScaleHeightMode() { String testName = "testScaleHeightMode"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); float pageWidthPt = 400f; float pageHeightPt = 400f; Rectangle pageSize = new Rectangle(pageWidthPt, pageHeightPt); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetScaleMode(ScaleMode.SCALE_HEIGHT); properties.SetPageSize(pageSize); PdfHelper.CreatePdf(pdfPath, file, properties); Rectangle rect = GetImageBBoxRectangleFromPdf(pdfPath); ImageData originalImageData = ImageDataFactory.Create(file.FullName); NUnit.Framework.Assert.AreEqual(pageWidthPt, rect.GetWidth(), DELTA); NUnit.Framework.Assert.AreEqual(originalImageData.GetWidth() / originalImageData.GetHeight(), rect.GetWidth () / rect.GetHeight(), DELTA); }
public virtual void TestPdfCustomMetadata() { String testName = "testPdfCustomMetadata"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); String locale = "nl-BE"; properties.SetPdfLang(locale); String title = "Title"; properties.SetTitle(title); PdfHelper.CreatePdfA(pdfPath, file, new OcrPdfCreatorProperties(properties), PdfHelper.GetCMYKPdfOutputIntent ()); PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath)); NUnit.Framework.Assert.AreEqual(locale, pdfDocument.GetCatalog().GetLang().ToString()); NUnit.Framework.Assert.AreEqual(title, pdfDocument.GetDocumentInfo().GetTitle()); NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U, pdfDocument.GetReader().GetPdfAConformanceLevel ()); pdfDocument.Close(); }
/// <summary> /// Sets properties for /// <see cref="OcrPdfCreator"/>. /// </summary> /// <param name="ocrPdfCreatorProperties"> /// set of properties /// <see cref="OcrPdfCreatorProperties"/> /// for /// <see cref="OcrPdfCreator"/> /// </param> public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProperties) { this.ocrPdfCreatorProperties = ocrPdfCreatorProperties; }
/// <summary> /// Creates a new /// <see cref="OcrPdfCreator"/> /// instance. /// </summary> /// <param name="ocrEngine"> /// selected OCR Reader /// <see cref="IOcrEngine"/> /// </param> /// <param name="ocrPdfCreatorProperties"> /// set of properties for /// <see cref="OcrPdfCreator"/> /// </param> public OcrPdfCreator(IOcrEngine ocrEngine, OcrPdfCreatorProperties ocrPdfCreatorProperties) { SetOcrEngine(ocrEngine); SetOcrPdfCreatorProperties(ocrPdfCreatorProperties); }