/// <summary> /// Add text annotation to pdf /// </summary> /// <param name="pdf">Pdf to add annotation to</param> /// <param name="textAnnotation">Annotation object</param> /// <returns>Pdf containing the annotation</returns> public byte[] AddAnnotation(byte[] pdf, TextAnnotation textAnnotation) { using (var pdfInstance = GdPictureHelper.GetPDFInstance()) { pdfInstance.LoadFromStream(new MemoryStream(pdf)); if (textAnnotation.Page == 0) { textAnnotation.Page++; } pdfInstance.SelectPage(textAnnotation.Page); using (var annotationManager = new AnnotationManager()) { annotationManager.InitFromGdPicturePDF(pdfInstance); var annotation = annotationManager.AddTextAnnot(textAnnotation.Left, textAnnotation.Top, textAnnotation.Width, textAnnotation.Height, textAnnotation.Text); annotation.FontSize = textAnnotation.FontSize; annotation.FontName = textAnnotation.FontName; annotation.ForeColor = textAnnotation.ForeColor; annotationManager.BurnAnnotationsToPage(false); using (var memoryStream = new MemoryStream()) { annotationManager.SaveDocumentToPDF(memoryStream); memoryStream.Position = 0; return(memoryStream.ToArray()); } } } }
/// <summary> /// Search text within a pdf /// </summary> /// <param name="pdf">Pdf as byte array</param> /// <param name="searchTexts">Search texts</param> /// <param name="caseSensitive">Gets or sets whether the search process is case sensitive</param> /// <returns>Search result list</returns> public IList <PdfSearchResult> Search(byte[] pdf, IList <string> searchTexts, bool caseSensitive = true) { searchTexts = searchTexts?.Where(x => !string.IsNullOrWhiteSpace(x))?.ToList() ?? new List <string>(); if (!searchTexts.Any()) { return(new List <PdfSearchResult>()); } var result = new List <PdfSearchResult>(); using (var stream = new MemoryStream(pdf)) { using (var pdfInstance = GdPictureHelper.GetPDFInstance()) { pdfInstance.LoadFromStream(stream); for (int i = 1; i <= pdfInstance.GetPageCount(); i++) { if (pdfInstance.SelectPage(i) == GdPictureStatus.OK) { var text = pdfInstance.GetPageText(); foreach (var searchText in searchTexts) { var match = false; if (caseSensitive) { match = text.Contains(searchText); } else { match = text.ToLower().Contains(searchText.ToLower()); } if (match) { result.Add(new PdfSearchResult { PageNumber = i, SearchText = searchText }); } } } } } } return(result); }
/// <summary> /// Add a layer to a pdf and return it /// </summary> /// <param name="pdf">Pdf to add the layer to</param> /// <param name="layer">Layer file as byte arrya</param> /// <param name="mode">Add mode (first page, each page, ...)</param> /// <returns>Pdf with layer</returns> public byte[] MergeLayer(byte[] pdf, byte[] layer, PdfLayerMode mode) { using (var templateStream = new MemoryStream(layer)) { using (var templatePdf = GdPictureHelper.GetPDFInstance()) { templatePdf.LoadFromStream(templateStream); templatePdf.SelectPage(1); using (var sourceStream = new MemoryStream(pdf)) { using (var sourcePdf = GdPictureHelper.GetPDFInstance()) { using (var targetPdf = GdPictureHelper.GetPDFInstance()) { targetPdf.NewPDF(); sourcePdf.LoadFromStream(sourceStream); for (int i = 1; i <= sourcePdf.GetPageCount(); i++) { if (sourcePdf.SelectPage(i) == GdPictureStatus.OK) { targetPdf.ClonePage(sourcePdf, i); targetPdf.SelectPage(i); if (mode == PdfLayerMode.AllPages || (mode == PdfLayerMode.FirstPage && i == 1)) { targetPdf.DrawPage(templatePdf, 1, 0, 0, templatePdf.GetPageWidth(), templatePdf.GetPageHeight()); } } } using (var targetStream = new MemoryStream()) { targetPdf.SaveToStream(targetStream); targetStream.Position = 0; return(targetStream.ToArray()); } } } } } } }
/// <summary> /// Merge pdfs and return a single pdf file /// </summary> /// <param name="pdfs">List of pdfs</param> /// <param name="options">Pdf merge options</param> /// <returns>Merged pdf</returns> public byte[] Merge(IList <byte[]> pdfs, MergeOption options = null) { if (!pdfs.Any()) { throw new InvalidOperationException("Could not merge 0 pdfs."); } if (pdfs.Count == 1) { return(pdfs.First()); } if (options == null) { options = new MergeOption(); } var pdfObjects = new List <GdPicturePDF>(); foreach (var pdf in pdfs) { var sourcePdf = GdPictureHelper.GetPDFInstance(); sourcePdf.LoadFromStream(new MemoryStream(pdf)); pdfObjects.Add(sourcePdf); } using (var targetPdf = GdPictureHelper.GetPDFInstance()) { var pdf = targetPdf.MergeDocuments(pdfObjects.ToArray()); using (var stream = new MemoryStream()) { pdf.SaveToStream(stream); // Dispose existing PDFs foreach (var _pdf in pdfObjects) { _pdf.Dispose(); } return(stream.ToArray()); } } }
/// <summary> /// Initialize service /// </summary> public PdfContentExtractionService() { pdfInstance = GdPictureHelper.GetPDFInstance(); }
public byte[] Convert(byte[] data, bool embeddOCRText = true, string language = "deu") { byte[] pdf = null; using (var pdfInstance = GdPictureHelper.GetPDFInstance()) { using (var gdPictureImaging = GdPictureHelper.GetImagingInstance()) { int imageId = gdPictureImaging.CreateGdPictureImageFromByteArray(data); if (gdPictureImaging.GetStat() == GdPictureStatus.OK) { float resolution = System.Math.Max(200, gdPictureImaging.GetVerticalResolution(imageId)); GdPictureStatus state = GdPictureStatus.OK; if (embeddOCRText) { state = pdfInstance.NewPDF(PdfConformance.PDF_A_1a); } else { state = pdfInstance.NewPDF(); } if (state == GdPictureStatus.OK) { for (int i = 1; i <= gdPictureImaging.GetPageCount(imageId); i++) { if (gdPictureImaging.SelectPage(imageId, i) == GdPictureStatus.OK) { var addImageResult = pdfInstance.AddImageFromGdPictureImage(imageId, false, true); } } // pdfInstance.OcrPages("*", 1, language, GdPictureHelper.OCRDirectory, "", resolution, 0, true); using (var stream = new MemoryStream()) { pdfInstance.SaveToStream(stream); stream.Position = 0; pdf = stream.ToArray(); } } else { throw new Exception($"Culd not convert document. State: {state}"); } // Release gdpicture image gdPictureImaging.ReleaseGdPictureImage(imageId); } else { throw new Exception("Could not create gdpicture imaging instance"); } // Close pdf document pdfInstance?.CloseDocument(); } } return(pdf); }
/// <summary> /// Execute barcode recognition /// </summary> /// <param name="blob">Blob to test</param> /// <param name="fileExtension">file extension</param> /// <param name="options">Settings/options</param> /// <returns>List of detected barcodes</returns> public IList <BarcodeRecognitionResult> Process(byte[] blob, string fileExtension, BarcodeRecognitionOption options) { var result = new List <BarcodeRecognitionResult>(); var barcodeTypes = Barcode1DReaderType.Barcode1DReaderCode128 | Barcode1DReaderType.Barcode1DReaderEAN13 | Barcode1DReaderType.Barcode1DReaderCode39; if (options.BarcodeTypes != null && options.BarcodeTypes.Any()) { barcodeTypes = Barcode1DReaderType.Barcode1DReaderNone; foreach (var type in options.BarcodeTypes) { if (string.IsNullOrWhiteSpace(type)) { continue; } barcodeTypes = barcodeTypes | (Barcode1DReaderType)Enum.Parse(typeof(Barcode1DReaderType), $"Barcode1DReader{type}", true); } } using (var gdPictureImage = GdPictureHelper.GetImagingInstance()) { if (fileExtension?.Replace(".", "")?.ToLower() == "pdf") { using (var stream = new MemoryStream(blob)) { using (var gdPicturePdf = GdPictureHelper.GetPDFInstance()) { stream.Position = 0; gdPicturePdf.LoadFromStream(stream); var pageCount = gdPicturePdf.GetPageCount(); for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { gdPicturePdf.SelectPage(pageNumber); var imageID = gdPicturePdf.RenderPageToGdPictureImageEx(options.PdfToTiffDPI, true); if (options.ConvertToBlackAndWhite) { gdPictureImage.FxBlackNWhite(imageID, BitonalReduction.Stucki); gdPictureImage.ConvertTo1BppFast(imageID); } var status = gdPictureImage.Barcode1DReaderDoScan(imageID, Barcode1DReaderScanMode.BestQuality, barcodeTypes, false, 1); if (status == GdPictureStatus.OK) { var barcodeAmount = gdPictureImage.Barcode1DReaderGetBarcodeCount(); for (int i = 1; i <= barcodeAmount; i++) { result.Add(new BarcodeRecognitionResult { Barcode = gdPictureImage.Barcode1DReaderGetBarcodeValue(i), Page = pageNumber, BarcodeType = gdPictureImage.Barcode1DReaderGetBarcodeType(i).ToString() }); } gdPictureImage.Barcode1DReaderClear(); } gdPictureImage.ReleaseGdPictureImage(imageID); } gdPicturePdf.CloseDocument(); } } } else { int imageID = gdPictureImage.CreateGdPictureImageFromByteArray(blob); gdPictureImage.TiffOpenMultiPageForWrite(true); if (!gdPictureImage.TiffIsMultiPage(imageID)) { int imagetmp = imageID; imageID = gdPictureImage.TiffCreateMultiPageFromGdPictureImage(imagetmp); gdPictureImage.ReleaseGdPictureImage(imagetmp); } if (options.ConvertToBlackAndWhite) { gdPictureImage.FxBlackNWhite(imageID, BitonalReduction.Stucki); gdPictureImage.ConvertTo1BppFast(imageID); } int pageCount = gdPictureImage.TiffGetPageCount(imageID); for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { gdPictureImage.TiffSelectPage(imageID, pageNumber); var status = gdPictureImage.Barcode1DReaderDoScan(imageID, Barcode1DReaderScanMode.BestQuality, barcodeTypes, false, 1); if (status == GdPictureStatus.OK) { var barcodeAmount = gdPictureImage.Barcode1DReaderGetBarcodeCount(); for (int i = 1; i <= barcodeAmount; i++) { result.Add(new BarcodeRecognitionResult { Barcode = gdPictureImage.Barcode1DReaderGetBarcodeValue(i), Page = pageNumber, BarcodeType = gdPictureImage.Barcode1DReaderGetBarcodeType(i).ToString() }); } gdPictureImage.Barcode1DReaderClear(); } } gdPictureImage.ReleaseGdPictureImage(imageID); } } return(result); }