/// <summary>Places provided images and recognized text to the result PDF document.</summary> /// <param name="imagesTextData"> /// map that contains input image /// files as keys, and as value: /// map pageNumber -> text for the page /// </param> /// <param name="pdfDocument"> /// result /// <see cref="iText.Kernel.Pdf.PdfDocument"/> /// </param> /// <param name="createPdfA3u">true if PDF/A3u document is being created</param> private void AddDataToPdfDocument(IDictionary <FileInfo, IDictionary <int, IList <TextInfo> > > imagesTextData, PdfDocument pdfDocument, bool createPdfA3u) { foreach (KeyValuePair <FileInfo, IDictionary <int, IList <TextInfo> > > entry in imagesTextData) { try { FileInfo inputImage = entry.Key; IList <ImageData> imageDataList = PdfCreatorUtil.GetImageData(inputImage); LOGGER.Info(MessageFormatUtil.Format(PdfOcrLogMessageConstant.NUMBER_OF_PAGES_IN_IMAGE, inputImage.ToString (), imageDataList.Count)); IDictionary <int, IList <TextInfo> > imageTextData = entry.Value; if (imageTextData.Keys.Count > 0) { for (int page = 0; page < imageDataList.Count; ++page) { ImageData imageData = imageDataList[page]; Rectangle imageSize = PdfCreatorUtil.CalculateImageSize(imageData, ocrPdfCreatorProperties.GetScaleMode(), ocrPdfCreatorProperties.GetPageSize()); if (imageTextData.ContainsKey(page + 1)) { AddToCanvas(pdfDocument, imageSize, imageTextData.Get(page + 1), imageData, createPdfA3u); } } } } catch (System.IO.IOException e) { LOGGER.Error(MessageFormatUtil.Format(PdfOcrLogMessageConstant.CANNOT_ADD_DATA_TO_PDF_DOCUMENT, e.Message) ); } } }
/// <summary>Places retrieved text to canvas to a separate layer.</summary> /// <param name="imageSize"> /// size of the image according to the selected /// <see cref="ScaleMode"/> /// </param> /// <param name="pageText">text that was found on this image (or on this page)</param> /// <param name="pdfCanvas">canvas to place the text</param> /// <param name="multiplier">coefficient to adjust text placing on canvas</param> /// <param name="pageMediaBox">page parameters</param> private void AddTextToCanvas(Rectangle imageSize, IList <TextInfo> pageText, PdfCanvas pdfCanvas, float multiplier , Rectangle pageMediaBox) { if (pageText != null && pageText.Count > 0) { Point imageCoordinates = PdfCreatorUtil.CalculateImageCoordinates(ocrPdfCreatorProperties.GetPageSize(), imageSize ); foreach (TextInfo item in pageText) { String line = item.GetText(); IList <float> coordinates = item.GetBbox(); float left = coordinates[0] * multiplier; float right = (coordinates[2] + 1) * multiplier - 1; float top = coordinates[1] * multiplier; float bottom = (coordinates[3] + 1) * multiplier - 1; float bboxWidthPt = PdfCreatorUtil.GetPoints(right - left); float bboxHeightPt = PdfCreatorUtil.GetPoints(bottom - top); FontProvider fontProvider = GetOcrPdfCreatorProperties().GetFontProvider(); String fontFamily = GetOcrPdfCreatorProperties().GetDefaultFontFamily(); if (!String.IsNullOrEmpty(line) && bboxHeightPt > 0 && bboxWidthPt > 0) { Document document = new Document(pdfCanvas.GetDocument()); document.SetFontProvider(fontProvider); // Scale the text width to fit the OCR bbox float fontSize = PdfCreatorUtil.CalculateFontSize(document, line, fontFamily, bboxHeightPt, bboxWidthPt); float lineWidth = PdfCreatorUtil.GetRealLineWidth(document, line, fontFamily, fontSize); float deltaX = PdfCreatorUtil.GetPoints(left); float deltaY = imageSize.GetHeight() - PdfCreatorUtil.GetPoints(bottom); iText.Layout.Canvas canvas = new iText.Layout.Canvas(pdfCanvas, pageMediaBox); canvas.SetFontProvider(fontProvider); Text text = new Text(line).SetHorizontalScaling(bboxWidthPt / lineWidth); Paragraph paragraph = new Paragraph(text).SetMargin(0).SetMultipliedLeading(1.2f); paragraph.SetFontFamily(fontFamily).SetFontSize(fontSize); paragraph.SetWidth(bboxWidthPt * 1.5f); if (ocrPdfCreatorProperties.GetTextColor() != null) { paragraph.SetFontColor(ocrPdfCreatorProperties.GetTextColor()); } else { paragraph.SetTextRenderingMode(PdfCanvasConstants.TextRenderingMode.INVISIBLE); } canvas.ShowTextAligned(paragraph, deltaX + (float)imageCoordinates.x, deltaY + (float)imageCoordinates.y, TextAlignment.LEFT); canvas.Close(); } } } }
/// <summary>Places given image to canvas to background to a separate layer.</summary> /// <param name="imageData"> /// input image as /// <see cref="System.IO.FileInfo"/> /// </param> /// <param name="imageSize"> /// size of the image according to the selected /// <see cref="ScaleMode"/> /// </param> /// <param name="pdfCanvas">canvas to place the image</param> private void AddImageToCanvas(ImageData imageData, Rectangle imageSize, PdfCanvas pdfCanvas) { if (imageData != null) { if (ocrPdfCreatorProperties.GetPageSize() == null) { pdfCanvas.AddImage(imageData, imageSize, false); } else { Point coordinates = PdfCreatorUtil.CalculateImageCoordinates(ocrPdfCreatorProperties.GetPageSize(), imageSize ); Rectangle rect = new Rectangle((float)coordinates.x, (float)coordinates.y, imageSize.GetWidth(), imageSize .GetHeight()); pdfCanvas.AddImage(imageData, rect, false); } } }
/// <summary>Adds image (or its one page) and text that was found there to canvas.</summary> /// <param name="pdfDocument"> /// result /// <see cref="iText.Kernel.Pdf.PdfDocument"/> /// </param> /// <param name="imageSize"> /// size of the image according to the selected /// <see cref="ScaleMode"/> /// </param> /// <param name="pageText">text that was found on this image (or on this page)</param> /// <param name="imageData"> /// input image if it is a single page or its one page if /// this is a multi-page image /// </param> /// <param name="createPdfA3u">true if PDF/A3u document is being created</param> private void AddToCanvas(PdfDocument pdfDocument, Rectangle imageSize, IList <TextInfo> pageText, ImageData imageData, bool createPdfA3u) { Rectangle rectangleSize = ocrPdfCreatorProperties.GetPageSize() == null ? imageSize : ocrPdfCreatorProperties .GetPageSize(); PageSize size = new PageSize(rectangleSize); PdfPage pdfPage = pdfDocument.AddNewPage(size); PdfCanvas canvas = new OcrPdfCreator.NotDefCheckingPdfCanvas(pdfPage, createPdfA3u); PdfLayer[] layers = CreatePdfLayers(ocrPdfCreatorProperties.GetImageLayerName(), ocrPdfCreatorProperties.GetTextLayerName (), pdfDocument); if (layers[0] != null) { canvas.BeginLayer(layers[0]); } AddImageToCanvas(imageData, imageSize, canvas); if (layers[0] != null && layers[0] != layers[1]) { canvas.EndLayer(); } // how much the original image size changed float multiplier = imageData == null ? 1 : imageSize.GetWidth() / PdfCreatorUtil.GetPoints(imageData.GetWidth ()); if (layers[1] != null && layers[0] != layers[1]) { canvas.BeginLayer(layers[1]); } try { AddTextToCanvas(imageSize, pageText, canvas, multiplier, pdfPage.GetMediaBox()); } catch (OcrException e) { LOGGER.Error(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, e.Message)); throw new OcrException(OcrException.CANNOT_CREATE_PDF_DOCUMENT).SetMessageParams(e.Message); } if (layers[1] != null) { canvas.EndLayer(); } }