public virtual void TestTextFromPdfLayers() { String testName = "testTextFromPdfLayers"; String path = PdfHelper.GetDefaultImagePath(); String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf"; FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetImageLayerName("Image Layer"); properties.SetTextLayerName("Text Layer"); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties); PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter (pdfPath)); NUnit.Framework.Assert.IsNotNull(doc); IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers(); NUnit.Framework.Assert.AreEqual(2, layers.Count); NUnit.Framework.Assert.AreEqual("Image Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString()); NUnit.Framework.Assert.IsTrue(layers[0].IsOn()); NUnit.Framework.Assert.AreEqual("Text Layer", layers[1].GetPdfObject().Get(PdfName.Name).ToString()); NUnit.Framework.Assert.IsTrue(layers[1].IsOn()); doc.Close(); NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, PdfHelper.GetTextFromPdfLayer(pdfPath, "Text Layer" )); NUnit.Framework.Assert.AreEqual("", PdfHelper.GetTextFromPdfLayer(pdfPath, "Image Layer")); }
/// <summary> /// Perform OCR using provided path to image (imgPath) /// and save result PDF document to "pdfPath". /// </summary> /// <remarks> /// Perform OCR using provided path to image (imgPath) /// and save result PDF document to "pdfPath". /// (Method is used for compare tool) /// </remarks> protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String imgPath, String pdfPath, IList <String> languages, IList <String> fonts, Color color) { if (languages != null) { Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties(); properties.SetLanguages(languages); tesseractReader.SetTesseract4OcrEngineProperties(properties); } OcrPdfCreatorProperties properties_1 = new OcrPdfCreatorProperties(); properties_1.SetPdfLang("en-US"); properties_1.SetTitle(""); if (fonts != null && fonts.Count > 0) { FontProvider fontProvider = new FontProvider(); foreach (String fontPath in fonts) { String name = FONT_PATH_TO_FONT_NAME_MAP.Get(fontPath); fontProvider.GetFontSet().AddFont(fontPath, PdfEncodings.IDENTITY_H, name); } properties_1.SetFontProvider(fontProvider); } if (color != null) { properties_1.SetTextColor(color); } if (languages != null) { NUnit.Framework.Assert.AreEqual(languages.Count, tesseractReader.GetTesseract4OcrEngineProperties().GetLanguages ().Count); } OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties_1); try { using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) { PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(imgPath )), pdfWriter); NUnit.Framework.Assert.IsNotNull(doc); doc.Close(); } } catch (System.IO.IOException e) { LOGGER.Error(e.Message); } }
public virtual void TestOriginalSizeScaleMode() { String path = PdfHelper.GetDefaultImagePath(); FileInfo file = new FileInfo(path); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine()); PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter ()); NUnit.Framework.Assert.IsNotNull(doc); ImageData imageData = ImageDataFactory.Create(file.FullName); float imageWidth = GetPoints(imageData.GetWidth()); float imageHeight = GetPoints(imageData.GetHeight()); float realWidth = doc.GetFirstPage().GetPageSize().GetWidth(); float realHeight = doc.GetFirstPage().GetPageSize().GetHeight(); NUnit.Framework.Assert.AreEqual(imageWidth, realWidth, DELTA); NUnit.Framework.Assert.AreEqual(imageHeight, realHeight, DELTA); doc.Close(); }
public virtual void TestPdfLayersWithTextLayerOnly() { String path = PdfHelper.GetDefaultImagePath(); FileInfo file = new FileInfo(path); OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties(); properties.SetTextLayerName("Text Layer"); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties); PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter ()); NUnit.Framework.Assert.IsNotNull(doc); IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers(); NUnit.Framework.Assert.AreEqual(1, layers.Count); NUnit.Framework.Assert.AreEqual("Text Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString()); NUnit.Framework.Assert.IsTrue(layers[0].IsOn()); doc.Close(); }
public virtual void TestPdfLayersWithDefaultNames() { String path = PdfHelper.GetDefaultImagePath(); FileInfo file = new FileInfo(path); OcrEngineProperties ocrEngineProperties = new OcrEngineProperties(); ocrEngineProperties.SetLanguages(JavaCollectionsUtil.SingletonList <String>("eng")); CustomOcrEngine engine = new CustomOcrEngine(ocrEngineProperties); OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(engine); PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter ()); NUnit.Framework.Assert.IsNotNull(doc); IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers(); NUnit.Framework.Assert.AreEqual(0, layers.Count); doc.Close(); NUnit.Framework.Assert.AreEqual(engine, ocrPdfCreator.GetOcrEngine()); NUnit.Framework.Assert.AreEqual(1, engine.GetOcrEngineProperties().GetLanguages().Count); NUnit.Framework.Assert.AreEqual("eng", engine.GetOcrEngineProperties().GetLanguages()[0]); }