public virtual void TestCompliantThaiPdfA() { String testName = "testCompliantThaiPdfA"; String path = PdfHelper.GetThaiImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); FontProvider fontProvider = new FontProvider("Kanit"); fontProvider.AddFont(PdfHelper.GetKanitFontPath()); PdfOcrFontProvider pdfOcrFontProvider = new PdfOcrFontProvider(fontProvider.GetFontSet(), "Kanit"); ocrPdfCreatorProperties.SetFontProvider(pdfOcrFontProvider); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent ()); String resultWithActualText = PdfHelper.GetTextFromPdfLayerUseActualText(pdfPath, null); NUnit.Framework.Assert.AreEqual(PdfHelper.THAI_TEXT, resultWithActualText); String resultWithoutUseActualText = PdfHelper.GetTextFromPdfLayer(pdfPath, null); NUnit.Framework.Assert.AreEqual(PdfHelper.THAI_TEXT, resultWithoutUseActualText); NUnit.Framework.Assert.AreEqual(resultWithoutUseActualText, resultWithActualText); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath); PdfFont font = strategy.GetPdfFont(); String fontName = font.GetFontProgram().GetFontNames().GetFontName(); NUnit.Framework.Assert.IsTrue(fontName.Contains("Kanit")); NUnit.Framework.Assert.IsTrue(font.IsEmbedded()); }
public virtual void TestIncompatibleOutputIntentAndFontColorSpaceException() { NUnit.Framework.Assert.That(() => { String testName = "testIncompatibleOutputIntentAndFontColorSpaceException"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceCmyk.BLACK); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent ()); } , NUnit.Framework.Throws.InstanceOf <PdfException>().With.Message.EqualTo(PdfAConformanceException.DEVICECMYK_MAY_BE_USED_ONLY_IF_THE_FILE_HAS_A_CMYK_PDFA_OUTPUT_INTENT_OR_DEFAULTCMYK_IN_USAGE_CONTEXT)) ; }
public virtual void TestPdfA3uWithNullIntent() { String testName = "testPdfA3uWithNullIntent"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetTextColor(DeviceCmyk.BLACK); properties.SetScaleMode(ScaleMode.SCALE_TO_FIT); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), properties, null); String result = PdfHelper.GetTextFromPdfLayer(pdfPath, null); NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result); NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode()); }
public virtual void TestNonCompliantThaiPdfA() { NUnit.Framework.Assert.That(() => { String testName = "testNonCompliantThaiPdfA"; String path = PdfHelper.GetThaiImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent ()); } , NUnit.Framework.Throws.InstanceOf <OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611)))) ; }
/// <summary> /// Perform OCR using provided path to image (imgPath) /// and save result PDF document to "pdfPath". /// </summary> /// <remarks> /// Perform OCR using provided path to image (imgPath) /// and save result PDF document to "pdfPath". /// (Method is used for compare tool) /// </remarks> protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String imgPath, String pdfPath, IList <String> languages, IList <String> fonts, Color color) { if (languages != null) { Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetLanguages(languages); tesseractReader.SetTesseract4OcrEngineProperties(properties); } OcrPdfCreatorProperties properties_1 = new OcrPdfCreatorProperties(); properties_1.SetPdfLang("en-US"); properties_1.SetTitle(""); if (fonts != null && fonts.Count > 0) { FontProvider fontProvider = new FontProvider(); foreach (String fontPath in fonts) { String name = FONT_PATH_TO_FONT_NAME_MAP.Get(fontPath); fontProvider.GetFontSet().AddFont(fontPath, PdfEncodings.IDENTITY_H, name); } properties_1.SetFontProvider(fontProvider); } if (color != null) { properties_1.SetTextColor(color); } if (languages != null) { NUnit.Framework.Assert.AreEqual(languages.Count, tesseractReader.GetTesseract4OcrEngineProperties().GetLanguages ().Count); } OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties_1); try { using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) { PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(imgPath )), pdfWriter); NUnit.Framework.Assert.IsNotNull(doc); doc.Close(); } } catch (System.IO.IOException e) { LOGGER.Error(e.Message); } }
public virtual void TestDefaultFontInPdfARgb() { String testName = "testDefaultFontInPdf"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent()); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath); PdfFont font = strategy.GetPdfFont(); String fontName = font.GetFontProgram().GetFontNames().GetFontName(); NUnit.Framework.Assert.IsTrue(fontName.Contains("LiberationSans")); NUnit.Framework.Assert.IsTrue(font.IsEmbedded()); }
public virtual void TestFontColor() { String testName = "testFontColor"; String path = PdfHelper.GetImagesTestDirectory() + "multipage.tiff"; String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetScaleMode(ScaleMode.SCALE_TO_FIT); properties.SetTextLayerName("Text1"); Color color = DeviceCmyk.CYAN; properties.SetTextColor(color); PdfHelper.CreatePdf(pdfPath, file, properties); ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath, "Text1"); Color fillColor = strategy.GetFillColor(); NUnit.Framework.Assert.AreEqual(color, fillColor); }
public virtual void TestPdfA3DefaultMetadata() { String testName = "testPdfDefaultMetadata"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties(); ocrPdfCreatorProperties.SetPdfLang("en-US"); ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK); PdfHelper.CreatePdfA(pdfPath, file, ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent()); PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath)); NUnit.Framework.Assert.AreEqual("en-US", pdfDocument.GetCatalog().GetLang().ToString()); NUnit.Framework.Assert.AreEqual(null, pdfDocument.GetDocumentInfo().GetTitle()); NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U, pdfDocument.GetReader().GetPdfAConformanceLevel ()); pdfDocument.Close(); }